Snap for 8426163 from 5cfdcf077be7b3727160275e83ae6b754f01dd07 to mainline-tzdata2-release

Change-Id: I3b65f438bbe39df2585f87debc1a1e590ecb30c3
diff --git a/.appveyor.yml b/.appveyor.yml
index ea01077..a6f3c39 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -34,7 +34,7 @@
   - ps: cd OpenCL-ICD-Loader
   - ps: mkdir build
   - ps: cd build
   - cmake --build . --config %CONFIGURATION%
   - ps: cd $env:TOP
   # Get the libclcxx standard library:
diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
deleted file mode 100644
index 0c1778e..0000000
--- a/.github/workflows/presubmit.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-name: Presubmit
-on: [push, pull_request]
-  build:
-    name: Build ${{ matrix.os }} ${{ }}
-    runs-on: ${{ matrix.os }}
-    env:
-      JOB_ARCHITECTURE: ${{ matrix.arch }}
-      JOB_ENABLE_GL: ${{ }}
-    strategy:
-      matrix:
-        mainmatrix: [true]
-        os: [ubuntu-20.04, macos-11.0]
-        include:
-          - os: ubuntu-20.04
-            mainmatrix: true
-            gl: 1
-          - os: ubuntu-20.04
-            mainmatrix: false
-            name: Arm
-            arch: arm
-          - os: ubuntu-20.04
-            mainmatrix: false
-            name: AArch64
-            arch: aarch64
-    steps:
-      - uses: actions/checkout@v2
-      - name: Build
-        run: ./
-  formatcheck:
-    name: Check code format
-    runs-on: ubuntu-20.04
-    steps:
-      - name: Install packages
-        run: sudo apt install -y clang-format
-      - uses: actions/checkout@v2
-        with:
-          fetch-depth: 0
-      - name: Check code format
-        run: ./
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b826e68
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+# build directories
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..98984d8
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,22 @@
+language: cpp
+  - linux
+  - osx
+  include:
+  - os: linux
+    dist: bionic
+    env: JOB_CHECK_FORMAT=1
+    addons:
+        apt:
+          packages:
+            - clang-format-9
+  - os: linux
+    env: JOB_ARCHITECTURE=arm
+  - os: linux
+    env: JOB_ARCHITECTURE=aarch64
+  - ./
diff --git a/Android.bp b/Android.bp
deleted file mode 100644
index f48dae0..0000000
--- a/Android.bp
+++ /dev/null
@@ -1,501 +0,0 @@
-package {
-    default_applicable_licenses: ["external_OpenCL-CTS_license"],
-// Added automatically by a large-scale-change that took the approach of
-// 'apply every license found to every target'. While this makes sure we respect
-// every license restriction, it may not be entirely correct.
-// e.g. GPL in an MIT project might only apply to the contrib/ directory.
-// Please consider splitting the single license below into multiple licenses,
-// taking care not to lose any license_kind information, and overriding the
-// default license using the 'licenses: [...]' property on targets as needed.
-// For unused files, consider creating a 'fileGroup' with "//visibility:private"
-// to attach the license to, and including a comment whether the files may be
-// used in the current project.
-// See: http://go/android-license-faq
-license {
-    name: "external_OpenCL-CTS_license",
-    visibility: [":__subpackages__"],
-    license_kinds: [
-        "SPDX-license-identifier-Apache-2.0",
-        "SPDX-license-identifier-BSD",
-        "SPDX-license-identifier-MIT",
-        "SPDX-license-identifier-Unlicense",
-        "legacy_by_exception_only", // by exception only
-        "legacy_proprietary", // by exception only
-        "legacy_unencumbered",
-    ],
-    license_text: [
-        "LICENSE.txt",
-    ],
-cc_library_headers {
-    name: "ocl-harness-headers",
-    export_include_dirs: [
-        "test_common/harness",
-        "test_common"
-    ]
-cc_defaults {
-    name: "ocl-harness-defaults",
-    header_libs: [
-        "ocl-harness-headers",
-    ],
-    export_header_lib_headers: [
-        "ocl-harness-headers",
-    ],
-    cflags: [
-        "-Wno-#warnings",
-        "-Wno-absolute-value",
-        "-Wno-asm-operand-widths",
-        "-Wno-c++11-narrowing",
-        "-Wno-dangling-else",
-        "-Wno-date-time",
-        "-Wno-deprecated-declarations",
-        "-Wno-format",
-        "-Wno-ignored-pragmas",
-        "-Wno-ignored-qualifiers",
-        "-Wno-implicit-fallthrough",
-        "-Wno-logical-op-parentheses",
-        "-Wno-macro-redefined",
-        "-Wno-missing-braces",
-        "-Wno-missing-declarations",
-        "-Wno-missing-field-initializers",
-        "-Wno-non-virtual-dtor",
-        "-Wno-overloaded-virtual",
-        "-Wno-parentheses",
-        "-Wno-parentheses-equality",
-        "-Wno-reorder-ctor",
-        "-Wno-return-stack-address",
-        "-Wno-shift-negative-value",
-        "-Wno-sometimes-uninitialized",
-        "-Wno-switch",
-        "-Wno-unknown-pragmas",
-        "-Wno-unneeded-internal-declaration",
-        "-Wno-unused-function",
-        "-Wno-unused-label",
-        "-Wno-unused-parameter",
-        "-Wno-unused-variable",
-        "-Wno-writable-strings",
-        "-fexceptions",
-    ],
-    static_libs: [
-        "ocl-stubs"
-    ],
-cc_library {
-    name: "ocl-harness",
-    srcs: [ "test_common/harness/*.cpp", ],
-    defaults: [ "ocl-harness-defaults" ],
-cc_defaults {
-    name: "ocl-test-defaults",
-    defaults: [ "ocl-harness-defaults" ],
-    static_libs: [ "ocl-harness" ],
-    compile_multilib: "64",
-    multilib: {
-        lib64: {
-            suffix: "64",
-        },
-    },
-cc_defaults {
-    name: "ocl-test-image-defaults",
-    srcs: [ "test_conformance/images/common.cpp" ],
-    export_include_dirs: [ "test_conformance/images" ],
-    defaults: [ "ocl-test-defaults" ],
-cc_test {
-    name: "ocl-test-allocations",
-    srcs: [ "test_conformance/allocations/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-api",
-    srcs: [ "test_conformance/api/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-atomics",
-    srcs: [ "test_conformance/atomics/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-basic",
-    srcs: [ "test_conformance/basic/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-buffers",
-    srcs: [ "test_conformance/buffers/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-c11-atomics",
-    srcs: [ "test_conformance/c11_atomics/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-commonfns",
-    srcs: [ "test_conformance/commonfns/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-compiler",
-    srcs: [ "test_conformance/compiler/*.cpp" ],
-    data: [ "test_conformance/compiler/includeTestDirectory/testIncludeFile.h", "test_conformance/compiler/secondIncludeTestDirectory/testIncludeFile.h" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-computeinfo",
-    srcs: [ "test_conformance/computeinfo/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-contractions",
-    srcs: [ "test_conformance/contractions/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-conversions",
-    srcs: [ "test_conformance/conversions/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-device-execution",
-    srcs: [ "test_conformance/device_execution/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-device-partition",
-    srcs: [ "test_conformance/device_partition/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-device-timer",
-    srcs: [ "test_conformance/device_timer/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-events",
-    srcs: [ "test_conformance/events/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-generic-address-space",
-    srcs: [ "test_conformance/generic_address_space/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-geometrics",
-    srcs: [ "test_conformance/geometrics/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-half",
-    srcs: [ "test_conformance/half/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-integer-ops",
-    srcs: [ "test_conformance/integer_ops/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-math-brute-force",
-    srcs: [ "test_conformance/math_brute_force/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-mem-host-flags",
-    srcs: [ "test_conformance/mem_host_flags/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-multiple-device-context",
-    srcs: [ "test_conformance/multiple_device_context/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-non-uniform-work-group",
-    srcs: [ "test_conformance/non_uniform_work_group/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-pipes",
-    srcs: [ "test_conformance/pipes/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-printf",
-    srcs: [ "test_conformance/printf/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-profiling",
-    srcs: [ "test_conformance/profiling/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-relationals",
-    srcs: [ "test_conformance/relationals/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-select",
-    srcs: [ "test_conformance/select/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-spirv-new",
-    srcs: [ "test_conformance/spirv_new/*.cpp", "test_conformance/math_brute_force/reference_math.cpp", "test_conformance/math_brute_force/utility.cpp" ],
-    data: [ "test_conformance/spirv_new/spirv_asm/*", "test_conformance/spirv_new/spirv_bin/*" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-subgroups",
-    srcs: [ "test_conformance/subgroups/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-svm",
-    srcs: [ "test_conformance/SVM/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-thread-dimensions",
-    srcs: [ "test_conformance/thread_dimensions/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-vectors",
-    srcs: [ "test_conformance/vectors/*.cpp" ],
-    defaults: [ "ocl-test-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-image-clcopyimage",
-    srcs: [ "test_conformance/images/clCopyImage/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-image-clfillimage",
-    srcs: [ "test_conformance/images/clFillImage/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-image-clgetinfo",
-    srcs: [ "test_conformance/images/clGetInfo/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-image-clreadwriteimage",
-    srcs: [ "test_conformance/images/clReadWriteImage/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-image-kernel-image-methods",
-    srcs: [ "test_conformance/images/kernel_image_methods/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-image-kernel-read-write",
-    srcs: [ "test_conformance/images/kernel_read_write/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-cc_test {
-    name: "ocl-test-image-samplerlessreads",
-    srcs: [ "test_conformance/images/samplerlessReads/*.cpp" ],
-    defaults: [ "ocl-test-image-defaults" ],
-    rtti: false,
-    gtest: false
-python_test_host {
-    name: "opencl_cts",
-    main: "scripts/",
-    srcs: [ "scripts/" ],
-    data: [ "scripts/test_opencl_cts.xml" ],
-    test_config: "scripts/test_opencl_cts.xml",
-    version: {
-        py2: {
-            enabled: false,
-        },
-        py3: {
-            enabled: true
-        }
-    },
-    test_options: {
-        unit_test: false,
-    },
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 083ea96..799460d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.5.1)
+cmake_minimum_required(VERSION 3.1)
@@ -17,7 +17,6 @@
@@ -60,15 +59,63 @@
 #Vendor Customization File can be included here to provide a way to automatically
 #build driver as a dependency of the conformance tests, or other such CMake customization
-include(CMakeVendor.txt OPTIONAL)
+option(USE_VENDOR_CUSTOM_FILE "Use Vendor Customization File" OFF)
+    include(CMakeVendor.txt OPTIONAL)
+# Development options for OpenCL C++ tests
+# Use OpenCL C kernels instead of OpenCL C++ kernels
+option(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS "Use OpenCL C kernels in OpenCL C++ tests" OFF)
+# Only check if OpenCL C++ kernels compile to SPIR-V
+option(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION "Only check if OpenCL C++ kernels compile to SPIR-V" OFF)
+    message(FATAL_ERROR "Can't use OpenCL C kernels and compile to SPIR-V.")
+  add_definitions(-DCLPP_DEVELOPMENT_OPTIONS)
+  add_definitions(${CLPP_DEVELOPMENT_OPTIONS})
+# Offline OpenCL C/C++ compiler provided by Khronos is the only supported
+# offline compiler.
+# Path to offline OpenCL C/C++ compiler provided by Khronos.
+# See (spirv-1.1 branch or newer SPIR-V-ready
+# branch should be used).
+    # Additional OpenCL C/C++ compiler option.
+    message(WARNING "KHRONOS_OFFLINE_COMPILER is not defined!")
+    message(WARNING "Running CL C++ tests will not be possible.")
+# CL_LIBCLCXX_DIR - path to dir with OpenCL C++ STL (libclcxx)
 # CL_INCLUDE_DIR - path to dir with OpenCL headers
+# CL_LIBCLCXX_DIR - path to dir with OpenCL library
     message(STATUS "OpenCL hasn't been found!")
-    message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR and -DCL_LIB_DIR")
+    message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR, -DCL_LIB_DIR and -DCL_LIBCLCXX_DIR")
 # CLConform_GL_LIBRARIES_DIR - path to OpenGL libraries
@@ -119,42 +166,23 @@
             STREQUAL "x86")
         set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse -msse2 -mfpmath=sse")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -mfpmath=sse")
-        add_cxx_flag_if_supported(-frounding-math)
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__")
-    # Don't warn when using standard non-secure functions.
-    add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qlong-double -Qpc80 /DWIN32 /D_WINDOWS /W3 /GR /EHsc -nologo -Od -D_CRT_NONSTDC_NO_WARNINGS -EHsc -Wall -Qdiag-disable:68,111,177,186,161,869,1028,2259,2553,181,239,265,1188 -fp:strict -fp:source")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Qlong-double -Qpc80 /DWIN32 /D_WINDOWS /W3 /GR /EHsc -nologo -Od -D_CRT_NONSTDC_NO_WARNINGS -EHsc -Wall -Qdiag-disable:68,111,177,186,161,869,1028,2259,2553,181,239,265,1188 -fp:strict -fp:source")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qlong-double -Qpc80 /DWIN32 /D_WINDOWS /W3 /GR /EHsc -nologo -Od -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -EHsc -Wall -Qdiag-disable:68,111,177,186,161,869,1028,2259,2553,181,239,265,1188 -fp:strict -fp:source")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Qlong-double -Qpc80 /DWIN32 /D_WINDOWS /W3 /GR /EHsc -nologo -Od -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -EHsc -Wall -Qdiag-disable:68,111,177,186,161,869,1028,2259,2553,181,239,265,1188 -fp:strict -fp:source")
     list(APPEND CLConform_LIBRARIES m)
-    if(ANDROID OR WIN32)
-        set(LINK_PTHREAD OFF)
-    else()
-        set(LINK_PTHREAD ON)
-    endif()
+elseif(NOT WIN32)
     list(APPEND CLConform_LIBRARIES pthread)
     find_library(corefoundation CoreFoundation)
@@ -178,6 +206,7 @@
 # Support both VS2008 and VS2012.
 set (DLL_FILES "${VS_BUILD_DIR}/Debug/*.dll")
@@ -203,4 +232,17 @@
   add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} )
+# Copy required CL include directories into the build directory
+# as required for the compiler testing.
+# ... For running the compiler test on the command line.
+file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/includeTestDirectory" DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_conformance/compiler)
+file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory" DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_conformance/compiler)
+# ... For running the compiler test with VisualStudio.
+  file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/includeTestDirectory" DESTINATION "${CLConform_SOURCE_DIR}/build/test_conformance/compiler")
+  file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory" DESTINATION "${CLConform_SOURCE_DIR}/build/test_conformance/compiler")
diff --git a/CMakeVendor.txt b/CMakeVendor.txt
new file mode 100644
index 0000000..14486ec
--- /dev/null
+++ b/CMakeVendor.txt
@@ -0,0 +1,8 @@
+# We intentionally hardcode "_win32" to ensure backwards compatibility (to avoid breaking HAAVE)
+   if(ARM64_V8A)
+       set(ARCH "64")
+   else(ARM64_V8A)
+       set(ARCH "32")
+   endif(ARM64_V8A)
+endif (ANDROID)
diff --git a/METADATA b/METADATA
deleted file mode 100644
index 5c12860..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-third_party {
-  license_type: BY_EXCEPTION_ONLY
diff --git a/ b/
new file mode 100755
index 0000000..e1b8c1e
--- /dev/null
+++ b/
@@ -0,0 +1,161 @@
+# android-cmake and android-ndk based build script for conformance
+1) android-ndk version android-ndk-r10d or higher is required. Further, the environment
+   variable ANDROID_NDK should be defined to point to it.
+2) android-cmake should be installed (else the script can install it for you). If installed,
+   the environment variable ANDROID_CMAKE should point to install location, unless it is in the current
+   working directory in which case it is picked up by default.
+3) CL_INCLUDE_DIR should be defined to point to CL headers. Alternately, this can be provided
+   as an input (-I)
+4) Path to opencl library to link against ( can be provided using -L. If this isn't
+   available the script will try to use CL_LIB_DIR_64 or CL_LIB_DIR_32 environment variables -
+   if available - to pick up the right library for the architecture being built.
+import os
+import sys
+import subprocess
+import argparse
+import time
+import shlex
+start  = time.time()
+script = os.path.basename( sys.argv[ 0 ] )
+def die (msg):
+    print msg
+    exit(-1)
+def execute (cmdline):
+    retcode =
+    if retcode != 0:
+        raise Exception("Failed to execute '%s', got %d" % (commandLine, retcode))
+def build(args):
+    if not (args.testDir):
+        print("building...")
+        execute("make")
+    else:
+        if os.path.exists( os.path.join(args.bld_dir, "test_conformance", args.testDir) ):
+            os.chdir( os.path.join("test_conformance",args.testDir) )
+            print("Building test: %s..." %args.testDir)
+            execute("make")
+            os.chdir(args.bld_dir)
+        else:
+            print ("Error: %s test doesn't exist" %args.testDir)
+def configure (args):
+    print("configuring...")
+    cmdline = []
+    cmdline.extend(['cmake', "-DCMAKE_TOOLCHAIN_FILE=" + os.path.join(args.android_cmake,"android.toolchain.cmake")])
+    for var in args.cmake_defs :
+        cmdline.extend([ '-D', var ])
+    cmdline.extend(['-DCL_INCLUDE_DIR=' + args.inc_dir])
+    cmdline.extend(['-DCL_LIB_DIR=' + args.lib_dir])
+    cmdline.extend(['-DANDROID_NATIVE_API_LEVEL=' + "android-21"])
+    if args.arch == "64":
+        cmdline.extend(['-DANDROID_ABI=arm64-v8a'])
+        cmdline.extend(['-DANDROID_SO_UNDEFINED=ON'])
+    cmdline.extend([args.src_dir])
+    execute(cmdline)
+def check_var (parser, args, name):
+    if not(args.__dict__[name]):
+        parser.error("%s needs to be defined" % name)
+def print_config(args):
+    print("----------CONFIGURATION--------------\n")
+    print("android_cmake: %s" % args.android_cmake)
+    print("android_ndk:   %s" % args.android_ndk)
+    print("lib_dir:       %s" % args.lib_dir)
+    print("inc_dir:       %s" % args.inc_dir)
+    if len(args.cmake_defs):
+        print("cmake options:" + "\n:".join( [ " `%s'" % dir for dir in args.cmake_defs ] ))
+    print("architecture:  %s" % args.arch)
+    print("-------------------------------------\n")
+def get_input():
+    yes = set(['yes','y', 'ye', ''])
+    no = set(['no','n'])
+    choice = raw_input().lower()
+    if choice in yes:
+        return True
+    elif choice in no:
+        return False
+    else:
+        sys.stdout.write("Please respond with 'yes' or 'no'")
+        exit()
+def install_android_cmake():
+    parser.print_help()
+    print "\nandroid-cmake doesn't seem to be installed - It should be provided as a) cmdline input b) environment variable $ANDROID_CMAKE or c) present in the current directory\n"
+    print "if you would like to download and install it in the current directory please enter yes\n"
+    print "if you would like to provide an environment variable($ANDROID_CMAKE) or command-line input(--android_cmake) rerun the script enter no\n"
+    print "input: "
+    if get_input():
+        print("installing android-cmake")
+['git', 'clone', ''])
+        # Use a newer fork of android-cmake which has been updated to support Clang. GCC is deprecated in newer NDKs and C11 atomics conformance doesn't build with NDK > 10.
+['git', 'clone', ''])
+        args.android_cmake = os.path.join(args.src_dir,"android-cmake")
+    else:
+        exit()
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--android_cmake', dest='android_cmake', default=os.environ.get('ANDROID_CMAKE'), help="Path to android-cmake (can also be set using environment variable $ANDROID_CMAKE).")
+    parser.add_argument('--android_ndk', dest='android_ndk', default=os.environ.get('ANDROID_NDK'), help="Path to android-ndk (can also be set using environment variable $ANDROID_NDK).")
+    parser.add_argument('-L','--lib_dir', dest='lib_dir', default="", help="Path to libOpenCL to link against (can also be set using environment variable $CL_LIB_DIR_32 and $CL_LIB_DIR_64).")
+    parser.add_argument('-I','--include_dir', dest='inc_dir', default=os.environ.get('CL_INCLUDE_DIR'), help="Path to headers (can also be set using environment variable $CL_INCLUDE_DIR).")
+    parser.add_argument('-D', dest='cmake_defs', action='append', default=[], help="Define CMAKE variable")
+    parser.add_argument('-a','--arch', default="32", help="Architecture to build for (32 or 64)")
+    parser.add_argument('-t','--test', dest='testDir', default="", help="Builds the given test")
+    args = parser.parse_args()
+    args.src_dir = os.path.realpath(os.path.dirname( sys.argv[ 0 ]))
+    if not (args.android_cmake):
+        if os.path.exists(os.path.join(args.src_dir,"android-cmake")):
+            args.android_cmake = os.path.join(args.src_dir,"android-cmake")
+        else:
+            install_android_cmake()
+    if not (args.lib_dir):
+        lib_var_name = "CL_LIB_DIR_" + ("32" if (args.arch == "32") else "64")
+        args.lib_dir = os.environ.get(lib_var_name)
+    check_var(parser, args, "android_cmake")
+    check_var(parser, args, "lib_dir")
+    check_var(parser, args, "inc_dir")
+    check_var(parser, args, "android_ndk")
+    print_config(args)
+    args.bld_dir = os.path.join(args.src_dir, 'bld_android_%s' % args.arch)
+    if not os.path.exists(args.bld_dir):
+        os.makedirs(args.bld_dir)
+    os.chdir(args.bld_dir)
+    configure(args)
+    build(args)
+    sys.exit( 0 )
+    finish = time.time()
+    print("Elapsed time: %.0f s." % ( finish - start ) )
diff --git a/ b/
new file mode 100755
index 0000000..f1c71f8
--- /dev/null
+++ b/
@@ -0,0 +1,6 @@
+mkdir -p build_lnx
+cd build_lnx
+make --jobs 8
diff --git a/build_win.bat b/build_win.bat
new file mode 100644
index 0000000..6ae3182
--- /dev/null
+++ b/build_win.bat
@@ -0,0 +1,32 @@
+@ECHO off
+IF DEFINED ProgramFiles(x86) SET ProgFilesDir=%ProgramFiles(x86)%
+IF NOT DEFINED ProgFilesDir SET ProgFilesDir=%ProgramFiles%
+rem -------------------------------- Update these to match what's on your PC ------------------------------------------------
+SET VCPATH="%ProgFilesDir%\Microsoft Visual Studio 14.0\Common7\IDE\"
+rem -------------------------------------------------------------------------------------------------------------------------
+call "%VS140COMNTOOLS%\vsvars32.bat"
+mkdir build_win
+pushd build_win
+IF NOT EXIST CLConform.sln (
+   echo "Solution file not found, running Cmake"
+) else (
+   echo "Solution file found CLConform.sln "
+echo Building CLConform.sln...
+%VCPATH% CLConform.sln /build
diff --git a/ b/
index 7de2bd2..7eae2fd 100755
--- a/
+++ b/
@@ -2,10 +2,12 @@
 # Arg used to specify non-'origin/master' comparison branch
-CLANG_BINARY=${2:-"`which clang-format-9`"}
 # Run git-clang-format to check for violations
-CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp --binary $CLANG_BINARY)
+if [ "$TRAVIS" == "true" ]; then
+    EXTRA_OPTS="--binary `which clang-format-9`"
+CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp $EXTRA_OPTS)
 # Check for no-ops
 grep '^no modified files to format$' <<<"$CLANG_FORMAT_OUTPUT" && exit 0
diff --git a/dependencies/Android.bp b/dependencies/Android.bp
deleted file mode 100644
index a8dbeee..0000000
--- a/dependencies/Android.bp
+++ /dev/null
@@ -1,39 +0,0 @@
-package {
-    // See: http://go/android-license-faq
-    // A large-scale-change added 'default_applicable_licenses' to import
-    // all of the 'license_kinds' from "external_OpenCL-CTS_license"
-    // to get the below license kinds:
-    //   SPDX-license-identifier-Apache-2.0
-    default_applicable_licenses: ["external_OpenCL-CTS_license"],
-cc_library_headers {
-    name: "ocl-headers",
-    export_include_dirs: [ "ocl-headers" ],
-cc_library {
-    name: "ocl-stubs",
-    srcs: [ "ocl-stubs/stubs.cpp" ],
-    cflags: [
-    ],
-    header_libs: [ "ocl-headers" ],
-    export_header_lib_headers: [ "ocl-headers" ],
-    generated_headers: [ "generated-ocl-apis" ],
-genrule {
-    name: "generated-ocl-apis",
-    out: [ "apis.h" ],
-    tool_files: [ "ocl-stubs/" ],
-    srcs: [
-        "ocl-headers/CL/cl.h",
-        "ocl-headers/CL/cl_gl.h",
-        "ocl-headers/CL/cl_egl.h",
-        "ocl-headers/CL/cl_ext.h",
-        "ocl-headers/CL/cl_gl_ext.h",
-    ],
-    cmd: "python3 $(location) $(in) > $(out)"
diff --git a/dependencies/ocl-headers/CL/cl.h b/dependencies/ocl-headers/CL/cl.h
deleted file mode 100644
index 0018a0f..0000000
--- a/dependencies/ocl-headers/CL/cl.h
+++ /dev/null
@@ -1,1929 +0,0 @@
- * Copyright (c) 2008-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-#ifndef __OPENCL_CL_H
-#define __OPENCL_CL_H
-#include <CL/cl_version.h>
-#include <CL/cl_platform.h>
-#ifdef __cplusplus
-extern "C" {
-typedef struct _cl_platform_id *    cl_platform_id;
-typedef struct _cl_device_id *      cl_device_id;
-typedef struct _cl_context *        cl_context;
-typedef struct _cl_command_queue *  cl_command_queue;
-typedef struct _cl_mem *            cl_mem;
-typedef struct _cl_program *        cl_program;
-typedef struct _cl_kernel *         cl_kernel;
-typedef struct _cl_event *          cl_event;
-typedef struct _cl_sampler *        cl_sampler;
-typedef cl_uint             cl_bool;                     /* WARNING!  Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
-typedef cl_ulong            cl_bitfield;
-typedef cl_ulong            cl_properties;
-typedef cl_bitfield         cl_device_type;
-typedef cl_uint             cl_platform_info;
-typedef cl_uint             cl_device_info;
-typedef cl_bitfield         cl_device_fp_config;
-typedef cl_uint             cl_device_mem_cache_type;
-typedef cl_uint             cl_device_local_mem_type;
-typedef cl_bitfield         cl_device_exec_capabilities;
-#ifdef CL_VERSION_2_0
-typedef cl_bitfield         cl_device_svm_capabilities;
-typedef cl_bitfield         cl_command_queue_properties;
-#ifdef CL_VERSION_1_2
-typedef intptr_t            cl_device_partition_property;
-typedef cl_bitfield         cl_device_affinity_domain;
-typedef intptr_t            cl_context_properties;
-typedef cl_uint             cl_context_info;
-#ifdef CL_VERSION_2_0
-typedef cl_properties       cl_queue_properties;
-typedef cl_uint             cl_command_queue_info;
-typedef cl_uint             cl_channel_order;
-typedef cl_uint             cl_channel_type;
-typedef cl_bitfield         cl_mem_flags;
-#ifdef CL_VERSION_2_0
-typedef cl_bitfield         cl_svm_mem_flags;
-typedef cl_uint             cl_mem_object_type;
-typedef cl_uint             cl_mem_info;
-#ifdef CL_VERSION_1_2
-typedef cl_bitfield         cl_mem_migration_flags;
-typedef cl_uint             cl_image_info;
-#ifdef CL_VERSION_1_1
-typedef cl_uint             cl_buffer_create_type;
-typedef cl_uint             cl_addressing_mode;
-typedef cl_uint             cl_filter_mode;
-typedef cl_uint             cl_sampler_info;
-typedef cl_bitfield         cl_map_flags;
-#ifdef CL_VERSION_2_0
-typedef intptr_t            cl_pipe_properties;
-typedef cl_uint             cl_pipe_info;
-typedef cl_uint             cl_program_info;
-typedef cl_uint             cl_program_build_info;
-#ifdef CL_VERSION_1_2
-typedef cl_uint             cl_program_binary_type;
-typedef cl_int              cl_build_status;
-typedef cl_uint             cl_kernel_info;
-#ifdef CL_VERSION_1_2
-typedef cl_uint             cl_kernel_arg_info;
-typedef cl_uint             cl_kernel_arg_address_qualifier;
-typedef cl_uint             cl_kernel_arg_access_qualifier;
-typedef cl_bitfield         cl_kernel_arg_type_qualifier;
-typedef cl_uint             cl_kernel_work_group_info;
-#ifdef CL_VERSION_2_1
-typedef cl_uint             cl_kernel_sub_group_info;
-typedef cl_uint             cl_event_info;
-typedef cl_uint             cl_command_type;
-typedef cl_uint             cl_profiling_info;
-#ifdef CL_VERSION_2_0
-typedef cl_properties       cl_sampler_properties;
-typedef cl_uint             cl_kernel_exec_info;
-#ifdef CL_VERSION_3_0
-typedef cl_bitfield         cl_device_atomic_capabilities;
-typedef cl_bitfield         cl_device_device_enqueue_capabilities;
-typedef cl_uint             cl_khronos_vendor_id;
-typedef cl_properties       cl_mem_properties;
-typedef cl_uint             cl_version;
-typedef struct _cl_image_format {
-    cl_channel_order        image_channel_order;
-    cl_channel_type         image_channel_data_type;
-} cl_image_format;
-#ifdef CL_VERSION_1_2
-typedef struct _cl_image_desc {
-    cl_mem_object_type      image_type;
-    size_t                  image_width;
-    size_t                  image_height;
-    size_t                  image_depth;
-    size_t                  image_array_size;
-    size_t                  image_row_pitch;
-    size_t                  image_slice_pitch;
-    cl_uint                 num_mip_levels;
-    cl_uint                 num_samples;
-#ifdef CL_VERSION_2_0
-#if defined(__GNUC__)
-    __extension__                   /* Prevents warnings about anonymous union in -pedantic builds */
-#if defined(_MSC_VER) && !defined(__STDC__)
-#pragma warning( push )
-#pragma warning( disable : 4201 )   /* Prevents warning about nameless struct/union in /W4 builds */
-#if defined(_MSC_VER) && defined(__STDC__)
-    /* Anonymous unions are not supported in /Za builds */
-    union {
-      cl_mem                  buffer;
-#ifdef CL_VERSION_2_0
-#if defined(_MSC_VER) && defined(__STDC__)
-    /* Anonymous unions are not supported in /Za builds */
-      cl_mem                  mem_object;
-    };
-#if defined(_MSC_VER) && !defined(__STDC__)
-#pragma warning( pop )
-} cl_image_desc;
-#ifdef CL_VERSION_1_1
-typedef struct _cl_buffer_region {
-    size_t                  origin;
-    size_t                  size;
-} cl_buffer_region;
-#ifdef CL_VERSION_3_0
-typedef struct _cl_name_version {
-    cl_version              version;
-    char                    name[CL_NAME_VERSION_MAX_NAME_SIZE];
-} cl_name_version;
-/* Error Codes */
-#define CL_SUCCESS                                  0
-#define CL_DEVICE_NOT_FOUND                         -1
-#define CL_DEVICE_NOT_AVAILABLE                     -2
-#define CL_COMPILER_NOT_AVAILABLE                   -3
-#define CL_OUT_OF_RESOURCES                         -5
-#define CL_OUT_OF_HOST_MEMORY                       -6
-#define CL_PROFILING_INFO_NOT_AVAILABLE             -7
-#define CL_MEM_COPY_OVERLAP                         -8
-#define CL_IMAGE_FORMAT_MISMATCH                    -9
-#define CL_IMAGE_FORMAT_NOT_SUPPORTED               -10
-#define CL_BUILD_PROGRAM_FAILURE                    -11
-#define CL_MAP_FAILURE                              -12
-#ifdef CL_VERSION_1_1
-#define CL_MISALIGNED_SUB_BUFFER_OFFSET             -13
-#ifdef CL_VERSION_1_2
-#define CL_COMPILE_PROGRAM_FAILURE                  -15
-#define CL_LINKER_NOT_AVAILABLE                     -16
-#define CL_LINK_PROGRAM_FAILURE                     -17
-#define CL_DEVICE_PARTITION_FAILED                  -18
-#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE            -19
-#define CL_INVALID_VALUE                            -30
-#define CL_INVALID_DEVICE_TYPE                      -31
-#define CL_INVALID_PLATFORM                         -32
-#define CL_INVALID_DEVICE                           -33
-#define CL_INVALID_CONTEXT                          -34
-#define CL_INVALID_QUEUE_PROPERTIES                 -35
-#define CL_INVALID_COMMAND_QUEUE                    -36
-#define CL_INVALID_HOST_PTR                         -37
-#define CL_INVALID_MEM_OBJECT                       -38
-#define CL_INVALID_IMAGE_SIZE                       -40
-#define CL_INVALID_SAMPLER                          -41
-#define CL_INVALID_BINARY                           -42
-#define CL_INVALID_BUILD_OPTIONS                    -43
-#define CL_INVALID_PROGRAM                          -44
-#define CL_INVALID_PROGRAM_EXECUTABLE               -45
-#define CL_INVALID_KERNEL_NAME                      -46
-#define CL_INVALID_KERNEL_DEFINITION                -47
-#define CL_INVALID_KERNEL                           -48
-#define CL_INVALID_ARG_INDEX                        -49
-#define CL_INVALID_ARG_VALUE                        -50
-#define CL_INVALID_ARG_SIZE                         -51
-#define CL_INVALID_KERNEL_ARGS                      -52
-#define CL_INVALID_WORK_DIMENSION                   -53
-#define CL_INVALID_WORK_GROUP_SIZE                  -54
-#define CL_INVALID_WORK_ITEM_SIZE                   -55
-#define CL_INVALID_GLOBAL_OFFSET                    -56
-#define CL_INVALID_EVENT_WAIT_LIST                  -57
-#define CL_INVALID_EVENT                            -58
-#define CL_INVALID_OPERATION                        -59
-#define CL_INVALID_GL_OBJECT                        -60
-#define CL_INVALID_BUFFER_SIZE                      -61
-#define CL_INVALID_MIP_LEVEL                        -62
-#define CL_INVALID_GLOBAL_WORK_SIZE                 -63
-#ifdef CL_VERSION_1_1
-#define CL_INVALID_PROPERTY                         -64
-#ifdef CL_VERSION_1_2
-#define CL_INVALID_IMAGE_DESCRIPTOR                 -65
-#define CL_INVALID_COMPILER_OPTIONS                 -66
-#define CL_INVALID_LINKER_OPTIONS                   -67
-#ifdef CL_VERSION_2_0
-#define CL_INVALID_PIPE_SIZE                        -69
-#define CL_INVALID_DEVICE_QUEUE                     -70
-#ifdef CL_VERSION_2_2
-#define CL_INVALID_SPEC_ID                          -71
-#define CL_MAX_SIZE_RESTRICTION_EXCEEDED            -72
-/* cl_bool */
-#define CL_FALSE                                    0
-#define CL_TRUE                                     1
-#ifdef CL_VERSION_1_2
-#define CL_BLOCKING                                 CL_TRUE
-#define CL_NON_BLOCKING                             CL_FALSE
-/* cl_platform_info */
-#define CL_PLATFORM_PROFILE                         0x0900
-#define CL_PLATFORM_VERSION                         0x0901
-#define CL_PLATFORM_NAME                            0x0902
-#define CL_PLATFORM_VENDOR                          0x0903
-#define CL_PLATFORM_EXTENSIONS                      0x0904
-#ifdef CL_VERSION_2_1
-#define CL_PLATFORM_HOST_TIMER_RESOLUTION           0x0905
-#ifdef CL_VERSION_3_0
-#define CL_PLATFORM_NUMERIC_VERSION                 0x0906
-/* cl_device_type - bitfield */
-#define CL_DEVICE_TYPE_DEFAULT                      (1 << 0)
-#define CL_DEVICE_TYPE_CPU                          (1 << 1)
-#define CL_DEVICE_TYPE_GPU                          (1 << 2)
-#define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
-#ifdef CL_VERSION_1_2
-#define CL_DEVICE_TYPE_CUSTOM                       (1 << 4)
-#define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
-/* cl_device_info */
-#define CL_DEVICE_TYPE                                   0x1000
-#define CL_DEVICE_VENDOR_ID                              0x1001
-#define CL_DEVICE_MAX_COMPUTE_UNITS                      0x1002
-#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS               0x1003
-#define CL_DEVICE_MAX_WORK_GROUP_SIZE                    0x1004
-#define CL_DEVICE_MAX_WORK_ITEM_SIZES                    0x1005
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR            0x1006
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT             0x1008
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG            0x1009
-#define CL_DEVICE_MAX_CLOCK_FREQUENCY                    0x100C
-#define CL_DEVICE_ADDRESS_BITS                           0x100D
-#define CL_DEVICE_MAX_READ_IMAGE_ARGS                    0x100E
-#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS                   0x100F
-#define CL_DEVICE_MAX_MEM_ALLOC_SIZE                     0x1010
-#define CL_DEVICE_IMAGE2D_MAX_WIDTH                      0x1011
-#define CL_DEVICE_IMAGE2D_MAX_HEIGHT                     0x1012
-#define CL_DEVICE_IMAGE3D_MAX_WIDTH                      0x1013
-#define CL_DEVICE_IMAGE3D_MAX_HEIGHT                     0x1014
-#define CL_DEVICE_IMAGE3D_MAX_DEPTH                      0x1015
-#define CL_DEVICE_IMAGE_SUPPORT                          0x1016
-#define CL_DEVICE_MAX_PARAMETER_SIZE                     0x1017
-#define CL_DEVICE_MAX_SAMPLERS                           0x1018
-#define CL_DEVICE_MEM_BASE_ADDR_ALIGN                    0x1019
-#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE               0x101A
-#define CL_DEVICE_SINGLE_FP_CONFIG                       0x101B
-#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE                  0x101C
-#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE              0x101D
-#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE                  0x101E
-#define CL_DEVICE_GLOBAL_MEM_SIZE                        0x101F
-#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE               0x1020
-#define CL_DEVICE_MAX_CONSTANT_ARGS                      0x1021
-#define CL_DEVICE_LOCAL_MEM_TYPE                         0x1022
-#define CL_DEVICE_LOCAL_MEM_SIZE                         0x1023
-#define CL_DEVICE_ERROR_CORRECTION_SUPPORT               0x1024
-#define CL_DEVICE_PROFILING_TIMER_RESOLUTION             0x1025
-#define CL_DEVICE_ENDIAN_LITTLE                          0x1026
-#define CL_DEVICE_AVAILABLE                              0x1027
-#define CL_DEVICE_COMPILER_AVAILABLE                     0x1028
-#define CL_DEVICE_EXECUTION_CAPABILITIES                 0x1029
-#define CL_DEVICE_QUEUE_PROPERTIES                       0x102A    /* deprecated */
-#ifdef CL_VERSION_2_0
-#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES               0x102A
-#define CL_DEVICE_NAME                                   0x102B
-#define CL_DEVICE_VENDOR                                 0x102C
-#define CL_DRIVER_VERSION                                0x102D
-#define CL_DEVICE_PROFILE                                0x102E
-#define CL_DEVICE_VERSION                                0x102F
-#define CL_DEVICE_EXTENSIONS                             0x1030
-#define CL_DEVICE_PLATFORM                               0x1031
-#ifdef CL_VERSION_1_2
-#define CL_DEVICE_DOUBLE_FP_CONFIG                       0x1032
-/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */
-#ifdef CL_VERSION_1_1
-#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF            0x1034
-#define CL_DEVICE_HOST_UNIFIED_MEMORY                    0x1035   /* deprecated */
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR               0x1036
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT              0x1037
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT                0x1038
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG               0x1039
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT              0x103A
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE             0x103B
-#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF               0x103C
-#define CL_DEVICE_OPENCL_C_VERSION                       0x103D
-#ifdef CL_VERSION_1_2
-#define CL_DEVICE_LINKER_AVAILABLE                       0x103E
-#define CL_DEVICE_BUILT_IN_KERNELS                       0x103F
-#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE                  0x1040
-#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE                   0x1041
-#define CL_DEVICE_PARENT_DEVICE                          0x1042
-#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES              0x1043
-#define CL_DEVICE_PARTITION_PROPERTIES                   0x1044
-#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN              0x1045
-#define CL_DEVICE_PARTITION_TYPE                         0x1046
-#define CL_DEVICE_REFERENCE_COUNT                        0x1047
-#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC            0x1048
-#define CL_DEVICE_PRINTF_BUFFER_SIZE                     0x1049
-#ifdef CL_VERSION_2_0
-#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT                  0x104A
-#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS              0x104C
-#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE               0x104D
-#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES             0x104E
-#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE               0x1050
-#define CL_DEVICE_MAX_ON_DEVICE_QUEUES                   0x1051
-#define CL_DEVICE_MAX_ON_DEVICE_EVENTS                   0x1052
-#define CL_DEVICE_SVM_CAPABILITIES                       0x1053
-#define CL_DEVICE_MAX_PIPE_ARGS                          0x1055
-#define CL_DEVICE_PIPE_MAX_PACKET_SIZE                   0x1057
-#ifdef CL_VERSION_2_1
-#define CL_DEVICE_IL_VERSION                             0x105B
-#define CL_DEVICE_MAX_NUM_SUB_GROUPS                     0x105C
-#ifdef CL_VERSION_3_0
-#define CL_DEVICE_NUMERIC_VERSION                        0x105E
-#define CL_DEVICE_EXTENSIONS_WITH_VERSION                0x1060
-#define CL_DEVICE_ILS_WITH_VERSION                       0x1061
-#define CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES             0x1063
-#define CL_DEVICE_ATOMIC_FENCE_CAPABILITIES              0x1064
-#define CL_DEVICE_OPENCL_C_ALL_VERSIONS                  0x1066
-/* 0x106A to 0x106E - Reserved for upcoming KHR extension */
-#define CL_DEVICE_OPENCL_C_FEATURES                      0x106F
-#define CL_DEVICE_PIPE_SUPPORT                           0x1071
-/* cl_device_fp_config - bitfield */
-#define CL_FP_DENORM                                (1 << 0)
-#define CL_FP_INF_NAN                               (1 << 1)
-#define CL_FP_ROUND_TO_NEAREST                      (1 << 2)
-#define CL_FP_ROUND_TO_ZERO                         (1 << 3)
-#define CL_FP_ROUND_TO_INF                          (1 << 4)
-#define CL_FP_FMA                                   (1 << 5)
-#ifdef CL_VERSION_1_1
-#define CL_FP_SOFT_FLOAT                            (1 << 6)
-#ifdef CL_VERSION_1_2
-#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT         (1 << 7)
-/* cl_device_mem_cache_type */
-#define CL_NONE                                     0x0
-#define CL_READ_ONLY_CACHE                          0x1
-#define CL_READ_WRITE_CACHE                         0x2
-/* cl_device_local_mem_type */
-#define CL_LOCAL                                    0x1
-#define CL_GLOBAL                                   0x2
-/* cl_device_exec_capabilities - bitfield */
-#define CL_EXEC_KERNEL                              (1 << 0)
-#define CL_EXEC_NATIVE_KERNEL                       (1 << 1)
-/* cl_command_queue_properties - bitfield */
-#define CL_QUEUE_PROFILING_ENABLE                   (1 << 1)
-#ifdef CL_VERSION_2_0
-#define CL_QUEUE_ON_DEVICE                          (1 << 2)
-#define CL_QUEUE_ON_DEVICE_DEFAULT                  (1 << 3)
-/* cl_context_info */
-#define CL_CONTEXT_REFERENCE_COUNT                  0x1080
-#define CL_CONTEXT_DEVICES                          0x1081
-#define CL_CONTEXT_PROPERTIES                       0x1082
-#ifdef CL_VERSION_1_1
-#define CL_CONTEXT_NUM_DEVICES                      0x1083
-/* cl_context_properties */
-#define CL_CONTEXT_PLATFORM                         0x1084
-#ifdef CL_VERSION_1_2
-#define CL_CONTEXT_INTEROP_USER_SYNC                0x1085
-#ifdef CL_VERSION_1_2
-/* cl_device_partition_property */
-#define CL_DEVICE_PARTITION_EQUALLY                 0x1086
-#define CL_DEVICE_PARTITION_BY_COUNTS               0x1087
-#ifdef CL_VERSION_1_2
-/* cl_device_affinity_domain */
-#define CL_DEVICE_AFFINITY_DOMAIN_NUMA               (1 << 0)
-#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE           (1 << 1)
-#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE           (1 << 2)
-#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE           (1 << 3)
-#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE           (1 << 4)
-#ifdef CL_VERSION_2_0
-/* cl_device_svm_capabilities */
-#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER           (1 << 0)
-#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER             (1 << 1)
-#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM             (1 << 2)
-#define CL_DEVICE_SVM_ATOMICS                       (1 << 3)
-/* cl_command_queue_info */
-#define CL_QUEUE_CONTEXT                            0x1090
-#define CL_QUEUE_DEVICE                             0x1091
-#define CL_QUEUE_REFERENCE_COUNT                    0x1092
-#define CL_QUEUE_PROPERTIES                         0x1093
-#ifdef CL_VERSION_2_0
-#define CL_QUEUE_SIZE                               0x1094
-#ifdef CL_VERSION_2_1
-#define CL_QUEUE_DEVICE_DEFAULT                     0x1095
-#ifdef CL_VERSION_3_0
-#define CL_QUEUE_PROPERTIES_ARRAY                   0x1098
-/* cl_mem_flags and cl_svm_mem_flags - bitfield */
-#define CL_MEM_READ_WRITE                           (1 << 0)
-#define CL_MEM_WRITE_ONLY                           (1 << 1)
-#define CL_MEM_READ_ONLY                            (1 << 2)
-#define CL_MEM_USE_HOST_PTR                         (1 << 3)
-#define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
-#define CL_MEM_COPY_HOST_PTR                        (1 << 5)
-/* reserved                                         (1 << 6)    */
-#ifdef CL_VERSION_1_2
-#define CL_MEM_HOST_WRITE_ONLY                      (1 << 7)
-#define CL_MEM_HOST_READ_ONLY                       (1 << 8)
-#define CL_MEM_HOST_NO_ACCESS                       (1 << 9)
-#ifdef CL_VERSION_2_0
-#define CL_MEM_SVM_FINE_GRAIN_BUFFER                (1 << 10)   /* used by cl_svm_mem_flags only */
-#define CL_MEM_SVM_ATOMICS                          (1 << 11)   /* used by cl_svm_mem_flags only */
-#define CL_MEM_KERNEL_READ_AND_WRITE                (1 << 12)
-#ifdef CL_VERSION_1_2
-/* cl_mem_migration_flags - bitfield */
-#define CL_MIGRATE_MEM_OBJECT_HOST                  (1 << 0)
-/* cl_channel_order */
-#define CL_R                                        0x10B0
-#define CL_A                                        0x10B1
-#define CL_RG                                       0x10B2
-#define CL_RA                                       0x10B3
-#define CL_RGB                                      0x10B4
-#define CL_RGBA                                     0x10B5
-#define CL_BGRA                                     0x10B6
-#define CL_ARGB                                     0x10B7
-#define CL_INTENSITY                                0x10B8
-#define CL_LUMINANCE                                0x10B9
-#ifdef CL_VERSION_1_1
-#define CL_Rx                                       0x10BA
-#define CL_RGx                                      0x10BB
-#define CL_RGBx                                     0x10BC
-#ifdef CL_VERSION_1_2
-#define CL_DEPTH                                    0x10BD
-#define CL_DEPTH_STENCIL                            0x10BE
-#ifdef CL_VERSION_2_0
-#define CL_sRGB                                     0x10BF
-#define CL_sRGBx                                    0x10C0
-#define CL_sRGBA                                    0x10C1
-#define CL_sBGRA                                    0x10C2
-#define CL_ABGR                                     0x10C3
-/* cl_channel_type */
-#define CL_SNORM_INT8                               0x10D0
-#define CL_SNORM_INT16                              0x10D1
-#define CL_UNORM_INT8                               0x10D2
-#define CL_UNORM_INT16                              0x10D3
-#define CL_UNORM_SHORT_565                          0x10D4
-#define CL_UNORM_SHORT_555                          0x10D5
-#define CL_UNORM_INT_101010                         0x10D6
-#define CL_SIGNED_INT8                              0x10D7
-#define CL_SIGNED_INT16                             0x10D8
-#define CL_SIGNED_INT32                             0x10D9
-#define CL_UNSIGNED_INT8                            0x10DA
-#define CL_UNSIGNED_INT16                           0x10DB
-#define CL_UNSIGNED_INT32                           0x10DC
-#define CL_HALF_FLOAT                               0x10DD
-#define CL_FLOAT                                    0x10DE
-#ifdef CL_VERSION_1_2
-#define CL_UNORM_INT24                              0x10DF
-#ifdef CL_VERSION_2_1
-#define CL_UNORM_INT_101010_2                       0x10E0
-/* cl_mem_object_type */
-#define CL_MEM_OBJECT_BUFFER                        0x10F0
-#define CL_MEM_OBJECT_IMAGE2D                       0x10F1
-#define CL_MEM_OBJECT_IMAGE3D                       0x10F2
-#ifdef CL_VERSION_1_2
-#define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
-#define CL_MEM_OBJECT_IMAGE1D                       0x10F4
-#define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
-#define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
-#ifdef CL_VERSION_2_0
-#define CL_MEM_OBJECT_PIPE                          0x10F7
-/* cl_mem_info */
-#define CL_MEM_TYPE                                 0x1100
-#define CL_MEM_FLAGS                                0x1101
-#define CL_MEM_SIZE                                 0x1102
-#define CL_MEM_HOST_PTR                             0x1103
-#define CL_MEM_MAP_COUNT                            0x1104
-#define CL_MEM_REFERENCE_COUNT                      0x1105
-#define CL_MEM_CONTEXT                              0x1106
-#ifdef CL_VERSION_1_1
-#define CL_MEM_ASSOCIATED_MEMOBJECT                 0x1107
-#define CL_MEM_OFFSET                               0x1108
-#ifdef CL_VERSION_2_0
-#define CL_MEM_USES_SVM_POINTER                     0x1109
-#ifdef CL_VERSION_3_0
-#define CL_MEM_PROPERTIES                           0x110A
-/* cl_image_info */
-#define CL_IMAGE_FORMAT                             0x1110
-#define CL_IMAGE_ELEMENT_SIZE                       0x1111
-#define CL_IMAGE_ROW_PITCH                          0x1112
-#define CL_IMAGE_SLICE_PITCH                        0x1113
-#define CL_IMAGE_WIDTH                              0x1114
-#define CL_IMAGE_HEIGHT                             0x1115
-#define CL_IMAGE_DEPTH                              0x1116
-#ifdef CL_VERSION_1_2
-#define CL_IMAGE_ARRAY_SIZE                         0x1117
-#define CL_IMAGE_BUFFER                             0x1118
-#define CL_IMAGE_NUM_MIP_LEVELS                     0x1119
-#define CL_IMAGE_NUM_SAMPLES                        0x111A
-/* cl_pipe_info */
-#ifdef CL_VERSION_2_0
-#define CL_PIPE_PACKET_SIZE                         0x1120
-#define CL_PIPE_MAX_PACKETS                         0x1121
-#ifdef CL_VERSION_3_0
-#define CL_PIPE_PROPERTIES                          0x1122
-/* cl_addressing_mode */
-#define CL_ADDRESS_NONE                             0x1130
-#define CL_ADDRESS_CLAMP_TO_EDGE                    0x1131
-#define CL_ADDRESS_CLAMP                            0x1132
-#define CL_ADDRESS_REPEAT                           0x1133
-#ifdef CL_VERSION_1_1
-#define CL_ADDRESS_MIRRORED_REPEAT                  0x1134
-/* cl_filter_mode */
-#define CL_FILTER_NEAREST                           0x1140
-#define CL_FILTER_LINEAR                            0x1141
-/* cl_sampler_info */
-#define CL_SAMPLER_REFERENCE_COUNT                  0x1150
-#define CL_SAMPLER_CONTEXT                          0x1151
-#define CL_SAMPLER_NORMALIZED_COORDS                0x1152
-#define CL_SAMPLER_ADDRESSING_MODE                  0x1153
-#define CL_SAMPLER_FILTER_MODE                      0x1154
-#ifdef CL_VERSION_2_0
-/* These enumerants are for the cl_khr_mipmap_image extension.
-   They have since been added to cl_ext.h with an appropriate
-   KHR suffix, but are left here for backwards compatibility. */
-#define CL_SAMPLER_MIP_FILTER_MODE                  0x1155
-#define CL_SAMPLER_LOD_MIN                          0x1156
-#define CL_SAMPLER_LOD_MAX                          0x1157
-#ifdef CL_VERSION_3_0
-#define CL_SAMPLER_PROPERTIES                       0x1158
-/* cl_map_flags - bitfield */
-#define CL_MAP_READ                                 (1 << 0)
-#define CL_MAP_WRITE                                (1 << 1)
-#ifdef CL_VERSION_1_2
-#define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
-/* cl_program_info */
-#define CL_PROGRAM_REFERENCE_COUNT                  0x1160
-#define CL_PROGRAM_CONTEXT                          0x1161
-#define CL_PROGRAM_NUM_DEVICES                      0x1162
-#define CL_PROGRAM_DEVICES                          0x1163
-#define CL_PROGRAM_SOURCE                           0x1164
-#define CL_PROGRAM_BINARY_SIZES                     0x1165
-#define CL_PROGRAM_BINARIES                         0x1166
-#ifdef CL_VERSION_1_2
-#define CL_PROGRAM_NUM_KERNELS                      0x1167
-#define CL_PROGRAM_KERNEL_NAMES                     0x1168
-#ifdef CL_VERSION_2_1
-#define CL_PROGRAM_IL                               0x1169
-#ifdef CL_VERSION_2_2
-/* cl_program_build_info */
-#define CL_PROGRAM_BUILD_STATUS                     0x1181
-#define CL_PROGRAM_BUILD_OPTIONS                    0x1182
-#define CL_PROGRAM_BUILD_LOG                        0x1183
-#ifdef CL_VERSION_1_2
-#define CL_PROGRAM_BINARY_TYPE                      0x1184
-#ifdef CL_VERSION_2_0
-#ifdef CL_VERSION_1_2
-/* cl_program_binary_type */
-#define CL_PROGRAM_BINARY_TYPE_NONE                 0x0
-#define CL_PROGRAM_BINARY_TYPE_LIBRARY              0x2
-/* cl_build_status */
-#define CL_BUILD_SUCCESS                            0
-#define CL_BUILD_NONE                               -1
-#define CL_BUILD_ERROR                              -2
-#define CL_BUILD_IN_PROGRESS                        -3
-/* cl_kernel_info */
-#define CL_KERNEL_FUNCTION_NAME                     0x1190
-#define CL_KERNEL_NUM_ARGS                          0x1191
-#define CL_KERNEL_REFERENCE_COUNT                   0x1192
-#define CL_KERNEL_CONTEXT                           0x1193
-#define CL_KERNEL_PROGRAM                           0x1194
-#ifdef CL_VERSION_1_2
-#define CL_KERNEL_ATTRIBUTES                        0x1195
-#ifdef CL_VERSION_1_2
-/* cl_kernel_arg_info */
-#define CL_KERNEL_ARG_ADDRESS_QUALIFIER             0x1196
-#define CL_KERNEL_ARG_ACCESS_QUALIFIER              0x1197
-#define CL_KERNEL_ARG_TYPE_NAME                     0x1198
-#define CL_KERNEL_ARG_TYPE_QUALIFIER                0x1199
-#define CL_KERNEL_ARG_NAME                          0x119A
-#ifdef CL_VERSION_1_2
-/* cl_kernel_arg_address_qualifier */
-#define CL_KERNEL_ARG_ADDRESS_GLOBAL                0x119B
-#define CL_KERNEL_ARG_ADDRESS_LOCAL                 0x119C
-#define CL_KERNEL_ARG_ADDRESS_CONSTANT              0x119D
-#define CL_KERNEL_ARG_ADDRESS_PRIVATE               0x119E
-#ifdef CL_VERSION_1_2
-/* cl_kernel_arg_access_qualifier */
-#define CL_KERNEL_ARG_ACCESS_READ_ONLY              0x11A0
-#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY             0x11A1
-#define CL_KERNEL_ARG_ACCESS_READ_WRITE             0x11A2
-#define CL_KERNEL_ARG_ACCESS_NONE                   0x11A3
-#ifdef CL_VERSION_1_2
-/* cl_kernel_arg_type_qualifier */
-#define CL_KERNEL_ARG_TYPE_NONE                     0
-#define CL_KERNEL_ARG_TYPE_CONST                    (1 << 0)
-#define CL_KERNEL_ARG_TYPE_RESTRICT                 (1 << 1)
-#define CL_KERNEL_ARG_TYPE_VOLATILE                 (1 << 2)
-#ifdef CL_VERSION_2_0
-#define CL_KERNEL_ARG_TYPE_PIPE                     (1 << 3)
-/* cl_kernel_work_group_info */
-#define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
-#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE           0x11B1
-#define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
-#define CL_KERNEL_PRIVATE_MEM_SIZE                  0x11B4
-#ifdef CL_VERSION_1_2
-#define CL_KERNEL_GLOBAL_WORK_SIZE                  0x11B5
-#ifdef CL_VERSION_2_1
-/* cl_kernel_sub_group_info */
-#define CL_KERNEL_MAX_NUM_SUB_GROUPS                0x11B9
-#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS            0x11BA
-#ifdef CL_VERSION_2_0
-/* cl_kernel_exec_info */
-#define CL_KERNEL_EXEC_INFO_SVM_PTRS                0x11B6
-/* cl_event_info */
-#define CL_EVENT_COMMAND_QUEUE                      0x11D0
-#define CL_EVENT_COMMAND_TYPE                       0x11D1
-#define CL_EVENT_REFERENCE_COUNT                    0x11D2
-#ifdef CL_VERSION_1_1
-#define CL_EVENT_CONTEXT                            0x11D4
-/* cl_command_type */
-#define CL_COMMAND_NDRANGE_KERNEL                   0x11F0
-#define CL_COMMAND_TASK                             0x11F1
-#define CL_COMMAND_NATIVE_KERNEL                    0x11F2
-#define CL_COMMAND_READ_BUFFER                      0x11F3
-#define CL_COMMAND_WRITE_BUFFER                     0x11F4
-#define CL_COMMAND_COPY_BUFFER                      0x11F5
-#define CL_COMMAND_READ_IMAGE                       0x11F6
-#define CL_COMMAND_WRITE_IMAGE                      0x11F7
-#define CL_COMMAND_COPY_IMAGE                       0x11F8
-#define CL_COMMAND_COPY_IMAGE_TO_BUFFER             0x11F9
-#define CL_COMMAND_COPY_BUFFER_TO_IMAGE             0x11FA
-#define CL_COMMAND_MAP_BUFFER                       0x11FB
-#define CL_COMMAND_MAP_IMAGE                        0x11FC
-#define CL_COMMAND_UNMAP_MEM_OBJECT                 0x11FD
-#define CL_COMMAND_MARKER                           0x11FE
-#define CL_COMMAND_ACQUIRE_GL_OBJECTS               0x11FF
-#define CL_COMMAND_RELEASE_GL_OBJECTS               0x1200
-#ifdef CL_VERSION_1_1
-#define CL_COMMAND_READ_BUFFER_RECT                 0x1201
-#define CL_COMMAND_WRITE_BUFFER_RECT                0x1202
-#define CL_COMMAND_COPY_BUFFER_RECT                 0x1203
-#define CL_COMMAND_USER                             0x1204
-#ifdef CL_VERSION_1_2
-#define CL_COMMAND_BARRIER                          0x1205
-#define CL_COMMAND_MIGRATE_MEM_OBJECTS              0x1206
-#define CL_COMMAND_FILL_BUFFER                      0x1207
-#define CL_COMMAND_FILL_IMAGE                       0x1208
-#ifdef CL_VERSION_2_0
-#define CL_COMMAND_SVM_FREE                         0x1209
-#define CL_COMMAND_SVM_MEMCPY                       0x120A
-#define CL_COMMAND_SVM_MEMFILL                      0x120B
-#define CL_COMMAND_SVM_MAP                          0x120C
-#define CL_COMMAND_SVM_UNMAP                        0x120D
-#ifdef CL_VERSION_3_0
-#define CL_COMMAND_SVM_MIGRATE_MEM                  0x120E
-/* command execution status */
-#define CL_COMPLETE                                 0x0
-#define CL_RUNNING                                  0x1
-#define CL_SUBMITTED                                0x2
-#define CL_QUEUED                                   0x3
-/* cl_buffer_create_type */
-#ifdef CL_VERSION_1_1
-#define CL_BUFFER_CREATE_TYPE_REGION                0x1220
-/* cl_profiling_info */
-#define CL_PROFILING_COMMAND_QUEUED                 0x1280
-#define CL_PROFILING_COMMAND_SUBMIT                 0x1281
-#define CL_PROFILING_COMMAND_START                  0x1282
-#define CL_PROFILING_COMMAND_END                    0x1283
-#ifdef CL_VERSION_2_0
-#define CL_PROFILING_COMMAND_COMPLETE               0x1284
-/* cl_device_atomic_capabilities - bitfield */
-#ifdef CL_VERSION_3_0
-#define CL_DEVICE_ATOMIC_ORDER_RELAXED          (1 << 0)
-#define CL_DEVICE_ATOMIC_ORDER_ACQ_REL          (1 << 1)
-#define CL_DEVICE_ATOMIC_ORDER_SEQ_CST          (1 << 2)
-#define CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM        (1 << 3)
-#define CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP       (1 << 4)
-#define CL_DEVICE_ATOMIC_SCOPE_DEVICE           (1 << 5)
-/* cl_device_device_enqueue_capabilities - bitfield */
-#ifdef CL_VERSION_3_0
-#define CL_DEVICE_QUEUE_SUPPORTED               (1 << 0)
-/* cl_khronos_vendor_id */
-#define CL_KHRONOS_VENDOR_ID_CODEPLAY               0x10004
-#ifdef CL_VERSION_3_0
-/* cl_version */
-#define CL_VERSION_MAJOR(version) \
-#define CL_VERSION_MINOR(version) \
-#define CL_VERSION_PATCH(version) ((version) & CL_VERSION_PATCH_MASK)
-#define CL_MAKE_VERSION(major, minor, patch)                      \
-  ((((major) & CL_VERSION_MAJOR_MASK)                             \
-   ((patch) & CL_VERSION_PATCH_MASK))
-/* Platform API */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetPlatformIDs(cl_uint          num_entries,
-                 cl_platform_id * platforms,
-                 cl_uint *        num_platforms) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetPlatformInfo(cl_platform_id   platform,
-                  cl_platform_info param_name,
-                  size_t           param_value_size,
-                  void *           param_value,
-                  size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-/* Device APIs */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetDeviceIDs(cl_platform_id   platform,
-               cl_device_type   device_type,
-               cl_uint          num_entries,
-               cl_device_id *   devices,
-               cl_uint *        num_devices) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetDeviceInfo(cl_device_id    device,
-                cl_device_info  param_name,
-                size_t          param_value_size,
-                void *          param_value,
-                size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_int CL_API_CALL
-clCreateSubDevices(cl_device_id                         in_device,
-                   const cl_device_partition_property * properties,
-                   cl_uint                              num_devices,
-                   cl_device_id *                       out_devices,
-                   cl_uint *                            num_devices_ret) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2;
-#ifdef CL_VERSION_2_1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetDefaultDeviceCommandQueue(cl_context           context,
-                               cl_device_id         device,
-                               cl_command_queue     command_queue) CL_API_SUFFIX__VERSION_2_1;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetDeviceAndHostTimer(cl_device_id    device,
-                        cl_ulong*       device_timestamp,
-                        cl_ulong*       host_timestamp) CL_API_SUFFIX__VERSION_2_1;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetHostTimer(cl_device_id device,
-               cl_ulong *   host_timestamp) CL_API_SUFFIX__VERSION_2_1;
-/* Context APIs */
-extern CL_API_ENTRY cl_context CL_API_CALL
-clCreateContext(const cl_context_properties * properties,
-                cl_uint              num_devices,
-                const cl_device_id * devices,
-                void (CL_CALLBACK * pfn_notify)(const char * errinfo,
-                                                const void * private_info,
-                                                size_t       cb,
-                                                void *       user_data),
-                void *               user_data,
-                cl_int *             errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_context CL_API_CALL
-clCreateContextFromType(const cl_context_properties * properties,
-                        cl_device_type      device_type,
-                        void (CL_CALLBACK * pfn_notify)(const char * errinfo,
-                                                        const void * private_info,
-                                                        size_t       cb,
-                                                        void *       user_data),
-                        void *              user_data,
-                        cl_int *            errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetContextInfo(cl_context         context,
-                 cl_context_info    param_name,
-                 size_t             param_value_size,
-                 void *             param_value,
-                 size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_3_0
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetContextDestructorCallback(cl_context         context,
-                               void (CL_CALLBACK* pfn_notify)(cl_context context,
-                                                              void* user_data),
-                               void*              user_data) CL_API_SUFFIX__VERSION_3_0;
-/* Command Queue APIs */
-#ifdef CL_VERSION_2_0
-extern CL_API_ENTRY cl_command_queue CL_API_CALL
-clCreateCommandQueueWithProperties(cl_context               context,
-                                   cl_device_id             device,
-                                   const cl_queue_properties *    properties,
-                                   cl_int *                 errcode_ret) CL_API_SUFFIX__VERSION_2_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetCommandQueueInfo(cl_command_queue      command_queue,
-                      cl_command_queue_info param_name,
-                      size_t                param_value_size,
-                      void *                param_value,
-                      size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-/* Memory Object APIs */
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateBuffer(cl_context   context,
-               cl_mem_flags flags,
-               size_t       size,
-               void *       host_ptr,
-               cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_1
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateSubBuffer(cl_mem                   buffer,
-                  cl_mem_flags             flags,
-                  cl_buffer_create_type    buffer_create_type,
-                  const void *             buffer_create_info,
-                  cl_int *                 errcode_ret) CL_API_SUFFIX__VERSION_1_1;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateImage(cl_context              context,
-              cl_mem_flags            flags,
-              const cl_image_format * image_format,
-              const cl_image_desc *   image_desc,
-              void *                  host_ptr,
-              cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-#ifdef CL_VERSION_2_0
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreatePipe(cl_context                 context,
-             cl_mem_flags               flags,
-             cl_uint                    pipe_packet_size,
-             cl_uint                    pipe_max_packets,
-             const cl_pipe_properties * properties,
-             cl_int *                   errcode_ret) CL_API_SUFFIX__VERSION_2_0;
-#ifdef CL_VERSION_3_0
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateBufferWithProperties(cl_context                context,
-                             const cl_mem_properties * properties,
-                             cl_mem_flags              flags,
-                             size_t                    size,
-                             void *                    host_ptr,
-                             cl_int *                  errcode_ret) CL_API_SUFFIX__VERSION_3_0;
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateImageWithProperties(cl_context                context,
-                            const cl_mem_properties * properties,
-                            cl_mem_flags              flags,
-                            const cl_image_format *   image_format,
-                            const cl_image_desc *     image_desc,
-                            void *                    host_ptr,
-                            cl_int *                  errcode_ret) CL_API_SUFFIX__VERSION_3_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetSupportedImageFormats(cl_context           context,
-                           cl_mem_flags         flags,
-                           cl_mem_object_type   image_type,
-                           cl_uint              num_entries,
-                           cl_image_format *    image_formats,
-                           cl_uint *            num_image_formats) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetMemObjectInfo(cl_mem           memobj,
-                   cl_mem_info      param_name,
-                   size_t           param_value_size,
-                   void *           param_value,
-                   size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetImageInfo(cl_mem           image,
-               cl_image_info    param_name,
-               size_t           param_value_size,
-               void *           param_value,
-               size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_2_0
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetPipeInfo(cl_mem           pipe,
-              cl_pipe_info     param_name,
-              size_t           param_value_size,
-              void *           param_value,
-              size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_2_0;
-#ifdef CL_VERSION_1_1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetMemObjectDestructorCallback(cl_mem memobj,
-                                 void (CL_CALLBACK * pfn_notify)(cl_mem memobj,
-                                                                 void * user_data),
-                                 void * user_data) CL_API_SUFFIX__VERSION_1_1;
-/* SVM Allocation APIs */
-#ifdef CL_VERSION_2_0
-extern CL_API_ENTRY void * CL_API_CALL
-clSVMAlloc(cl_context       context,
-           cl_svm_mem_flags flags,
-           size_t           size,
-           cl_uint          alignment) CL_API_SUFFIX__VERSION_2_0;
-clSVMFree(cl_context        context,
-          void *            svm_pointer) CL_API_SUFFIX__VERSION_2_0;
-/* Sampler APIs */
-#ifdef CL_VERSION_2_0
-extern CL_API_ENTRY cl_sampler CL_API_CALL
-clCreateSamplerWithProperties(cl_context                     context,
-                              const cl_sampler_properties *  sampler_properties,
-                              cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_2_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetSamplerInfo(cl_sampler         sampler,
-                 cl_sampler_info    param_name,
-                 size_t             param_value_size,
-                 void *             param_value,
-                 size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-/* Program Object APIs */
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithSource(cl_context        context,
-                          cl_uint           count,
-                          const char **     strings,
-                          const size_t *    lengths,
-                          cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithBinary(cl_context                     context,
-                          cl_uint                        num_devices,
-                          const cl_device_id *           device_list,
-                          const size_t *                 lengths,
-                          const unsigned char **         binaries,
-                          cl_int *                       binary_status,
-                          cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithBuiltInKernels(cl_context            context,
-                                  cl_uint               num_devices,
-                                  const cl_device_id *  device_list,
-                                  const char *          kernel_names,
-                                  cl_int *              errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-#ifdef CL_VERSION_2_1
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithIL(cl_context    context,
-                     const void*    il,
-                     size_t         length,
-                     cl_int*        errcode_ret) CL_API_SUFFIX__VERSION_2_1;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clBuildProgram(cl_program           program,
-               cl_uint              num_devices,
-               const cl_device_id * device_list,
-               const char *         options,
-               void (CL_CALLBACK *  pfn_notify)(cl_program program,
-                                                void * user_data),
-               void *               user_data) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_int CL_API_CALL
-clCompileProgram(cl_program           program,
-                 cl_uint              num_devices,
-                 const cl_device_id * device_list,
-                 const char *         options,
-                 cl_uint              num_input_headers,
-                 const cl_program *   input_headers,
-                 const char **        header_include_names,
-                 void (CL_CALLBACK *  pfn_notify)(cl_program program,
-                                                  void * user_data),
-                 void *               user_data) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_program CL_API_CALL
-clLinkProgram(cl_context           context,
-              cl_uint              num_devices,
-              const cl_device_id * device_list,
-              const char *         options,
-              cl_uint              num_input_programs,
-              const cl_program *   input_programs,
-              void (CL_CALLBACK *  pfn_notify)(cl_program program,
-                                               void * user_data),
-              void *               user_data,
-              cl_int *             errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-#ifdef CL_VERSION_2_2
-clSetProgramReleaseCallback(cl_program          program,
-                            void (CL_CALLBACK * pfn_notify)(cl_program program,
-                                                            void * user_data),
-                            void *              user_data) CL_API_SUFFIX__VERSION_2_2_DEPRECATED;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetProgramSpecializationConstant(cl_program  program,
-                                   cl_uint     spec_id,
-                                   size_t      spec_size,
-                                   const void* spec_value) CL_API_SUFFIX__VERSION_2_2;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_int CL_API_CALL
-clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetProgramInfo(cl_program         program,
-                 cl_program_info    param_name,
-                 size_t             param_value_size,
-                 void *             param_value,
-                 size_t *           param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetProgramBuildInfo(cl_program            program,
-                      cl_device_id          device,
-                      cl_program_build_info param_name,
-                      size_t                param_value_size,
-                      void *                param_value,
-                      size_t *              param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-/* Kernel Object APIs */
-extern CL_API_ENTRY cl_kernel CL_API_CALL
-clCreateKernel(cl_program      program,
-               const char *    kernel_name,
-               cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clCreateKernelsInProgram(cl_program     program,
-                         cl_uint        num_kernels,
-                         cl_kernel *    kernels,
-                         cl_uint *      num_kernels_ret) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_2_1
-extern CL_API_ENTRY cl_kernel CL_API_CALL
-clCloneKernel(cl_kernel     source_kernel,
-              cl_int*       errcode_ret) CL_API_SUFFIX__VERSION_2_1;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainKernel(cl_kernel    kernel) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseKernel(cl_kernel   kernel) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelArg(cl_kernel    kernel,
-               cl_uint      arg_index,
-               size_t       arg_size,
-               const void * arg_value) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_2_0
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelArgSVMPointer(cl_kernel    kernel,
-                         cl_uint      arg_index,
-                         const void * arg_value) CL_API_SUFFIX__VERSION_2_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelExecInfo(cl_kernel            kernel,
-                    cl_kernel_exec_info  param_name,
-                    size_t               param_value_size,
-                    const void *         param_value) CL_API_SUFFIX__VERSION_2_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelInfo(cl_kernel       kernel,
-                cl_kernel_info  param_name,
-                size_t          param_value_size,
-                void *          param_value,
-                size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelArgInfo(cl_kernel       kernel,
-                   cl_uint         arg_indx,
-                   cl_kernel_arg_info  param_name,
-                   size_t          param_value_size,
-                   void *          param_value,
-                   size_t *        param_value_size_ret) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelWorkGroupInfo(cl_kernel                  kernel,
-                         cl_device_id               device,
-                         cl_kernel_work_group_info  param_name,
-                         size_t                     param_value_size,
-                         void *                     param_value,
-                         size_t *                   param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_2_1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelSubGroupInfo(cl_kernel                   kernel,
-                        cl_device_id                device,
-                        cl_kernel_sub_group_info    param_name,
-                        size_t                      input_value_size,
-                        const void*                 input_value,
-                        size_t                      param_value_size,
-                        void*                       param_value,
-                        size_t*                     param_value_size_ret) CL_API_SUFFIX__VERSION_2_1;
-/* Event Object APIs */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clWaitForEvents(cl_uint             num_events,
-                const cl_event *    event_list) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetEventInfo(cl_event         event,
-               cl_event_info    param_name,
-               size_t           param_value_size,
-               void *           param_value,
-               size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_1
-extern CL_API_ENTRY cl_event CL_API_CALL
-clCreateUserEvent(cl_context    context,
-                  cl_int *      errcode_ret) CL_API_SUFFIX__VERSION_1_1;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetUserEventStatus(cl_event   event,
-                     cl_int     execution_status) CL_API_SUFFIX__VERSION_1_1;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetEventCallback(cl_event    event,
-                   cl_int      command_exec_callback_type,
-                   void (CL_CALLBACK * pfn_notify)(cl_event event,
-                                                   cl_int   event_command_status,
-                                                   void *   user_data),
-                   void *      user_data) CL_API_SUFFIX__VERSION_1_1;
-/* Profiling APIs */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetEventProfilingInfo(cl_event            event,
-                        cl_profiling_info   param_name,
-                        size_t              param_value_size,
-                        void *              param_value,
-                        size_t *            param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-/* Flush and Finish APIs */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0;
-/* Enqueued Commands APIs */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReadBuffer(cl_command_queue    command_queue,
-                    cl_mem              buffer,
-                    cl_bool             blocking_read,
-                    size_t              offset,
-                    size_t              size,
-                    void *              ptr,
-                    cl_uint             num_events_in_wait_list,
-                    const cl_event *    event_wait_list,
-                    cl_event *          event) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReadBufferRect(cl_command_queue    command_queue,
-                        cl_mem              buffer,
-                        cl_bool             blocking_read,
-                        const size_t *      buffer_origin,
-                        const size_t *      host_origin,
-                        const size_t *      region,
-                        size_t              buffer_row_pitch,
-                        size_t              buffer_slice_pitch,
-                        size_t              host_row_pitch,
-                        size_t              host_slice_pitch,
-                        void *              ptr,
-                        cl_uint             num_events_in_wait_list,
-                        const cl_event *    event_wait_list,
-                        cl_event *          event) CL_API_SUFFIX__VERSION_1_1;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueWriteBuffer(cl_command_queue   command_queue,
-                     cl_mem             buffer,
-                     cl_bool            blocking_write,
-                     size_t             offset,
-                     size_t             size,
-                     const void *       ptr,
-                     cl_uint            num_events_in_wait_list,
-                     const cl_event *   event_wait_list,
-                     cl_event *         event) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueWriteBufferRect(cl_command_queue    command_queue,
-                         cl_mem              buffer,
-                         cl_bool             blocking_write,
-                         const size_t *      buffer_origin,
-                         const size_t *      host_origin,
-                         const size_t *      region,
-                         size_t              buffer_row_pitch,
-                         size_t              buffer_slice_pitch,
-                         size_t              host_row_pitch,
-                         size_t              host_slice_pitch,
-                         const void *        ptr,
-                         cl_uint             num_events_in_wait_list,
-                         const cl_event *    event_wait_list,
-                         cl_event *          event) CL_API_SUFFIX__VERSION_1_1;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueFillBuffer(cl_command_queue   command_queue,
-                    cl_mem             buffer,
-                    const void *       pattern,
-                    size_t             pattern_size,
-                    size_t             offset,
-                    size_t             size,
-                    cl_uint            num_events_in_wait_list,
-                    const cl_event *   event_wait_list,
-                    cl_event *         event) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueCopyBuffer(cl_command_queue    command_queue,
-                    cl_mem              src_buffer,
-                    cl_mem              dst_buffer,
-                    size_t              src_offset,
-                    size_t              dst_offset,
-                    size_t              size,
-                    cl_uint             num_events_in_wait_list,
-                    const cl_event *    event_wait_list,
-                    cl_event *          event) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueCopyBufferRect(cl_command_queue    command_queue,
-                        cl_mem              src_buffer,
-                        cl_mem              dst_buffer,
-                        const size_t *      src_origin,
-                        const size_t *      dst_origin,
-                        const size_t *      region,
-                        size_t              src_row_pitch,
-                        size_t              src_slice_pitch,
-                        size_t              dst_row_pitch,
-                        size_t              dst_slice_pitch,
-                        cl_uint             num_events_in_wait_list,
-                        const cl_event *    event_wait_list,
-                        cl_event *          event) CL_API_SUFFIX__VERSION_1_1;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReadImage(cl_command_queue     command_queue,
-                   cl_mem               image,
-                   cl_bool              blocking_read,
-                   const size_t *       origin,
-                   const size_t *       region,
-                   size_t               row_pitch,
-                   size_t               slice_pitch,
-                   void *               ptr,
-                   cl_uint              num_events_in_wait_list,
-                   const cl_event *     event_wait_list,
-                   cl_event *           event) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueWriteImage(cl_command_queue    command_queue,
-                    cl_mem              image,
-                    cl_bool             blocking_write,
-                    const size_t *      origin,
-                    const size_t *      region,
-                    size_t              input_row_pitch,
-                    size_t              input_slice_pitch,
-                    const void *        ptr,
-                    cl_uint             num_events_in_wait_list,
-                    const cl_event *    event_wait_list,
-                    cl_event *          event) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueFillImage(cl_command_queue   command_queue,
-                   cl_mem             image,
-                   const void *       fill_color,
-                   const size_t *     origin,
-                   const size_t *     region,
-                   cl_uint            num_events_in_wait_list,
-                   const cl_event *   event_wait_list,
-                   cl_event *         event) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueCopyImage(cl_command_queue     command_queue,
-                   cl_mem               src_image,
-                   cl_mem               dst_image,
-                   const size_t *       src_origin,
-                   const size_t *       dst_origin,
-                   const size_t *       region,
-                   cl_uint              num_events_in_wait_list,
-                   const cl_event *     event_wait_list,
-                   cl_event *           event) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
-                           cl_mem           src_image,
-                           cl_mem           dst_buffer,
-                           const size_t *   src_origin,
-                           const size_t *   region,
-                           size_t           dst_offset,
-                           cl_uint          num_events_in_wait_list,
-                           const cl_event * event_wait_list,
-                           cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueCopyBufferToImage(cl_command_queue command_queue,
-                           cl_mem           src_buffer,
-                           cl_mem           dst_image,
-                           size_t           src_offset,
-                           const size_t *   dst_origin,
-                           const size_t *   region,
-                           cl_uint          num_events_in_wait_list,
-                           const cl_event * event_wait_list,
-                           cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY void * CL_API_CALL
-clEnqueueMapBuffer(cl_command_queue command_queue,
-                   cl_mem           buffer,
-                   cl_bool          blocking_map,
-                   cl_map_flags     map_flags,
-                   size_t           offset,
-                   size_t           size,
-                   cl_uint          num_events_in_wait_list,
-                   const cl_event * event_wait_list,
-                   cl_event *       event,
-                   cl_int *         errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY void * CL_API_CALL
-clEnqueueMapImage(cl_command_queue  command_queue,
-                  cl_mem            image,
-                  cl_bool           blocking_map,
-                  cl_map_flags      map_flags,
-                  const size_t *    origin,
-                  const size_t *    region,
-                  size_t *          image_row_pitch,
-                  size_t *          image_slice_pitch,
-                  cl_uint           num_events_in_wait_list,
-                  const cl_event *  event_wait_list,
-                  cl_event *        event,
-                  cl_int *          errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueUnmapMemObject(cl_command_queue command_queue,
-                        cl_mem           memobj,
-                        void *           mapped_ptr,
-                        cl_uint          num_events_in_wait_list,
-                        const cl_event * event_wait_list,
-                        cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMigrateMemObjects(cl_command_queue       command_queue,
-                           cl_uint                num_mem_objects,
-                           const cl_mem *         mem_objects,
-                           cl_mem_migration_flags flags,
-                           cl_uint                num_events_in_wait_list,
-                           const cl_event *       event_wait_list,
-                           cl_event *             event) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueNDRangeKernel(cl_command_queue command_queue,
-                       cl_kernel        kernel,
-                       cl_uint          work_dim,
-                       const size_t *   global_work_offset,
-                       const size_t *   global_work_size,
-                       const size_t *   local_work_size,
-                       cl_uint          num_events_in_wait_list,
-                       const cl_event * event_wait_list,
-                       cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueNativeKernel(cl_command_queue  command_queue,
-                      void (CL_CALLBACK * user_func)(void *),
-                      void *            args,
-                      size_t            cb_args,
-                      cl_uint           num_mem_objects,
-                      const cl_mem *    mem_list,
-                      const void **     args_mem_loc,
-                      cl_uint           num_events_in_wait_list,
-                      const cl_event *  event_wait_list,
-                      cl_event *        event) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMarkerWithWaitList(cl_command_queue  command_queue,
-                            cl_uint           num_events_in_wait_list,
-                            const cl_event *  event_wait_list,
-                            cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueBarrierWithWaitList(cl_command_queue  command_queue,
-                             cl_uint           num_events_in_wait_list,
-                             const cl_event *  event_wait_list,
-                             cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-#ifdef CL_VERSION_2_0
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMFree(cl_command_queue  command_queue,
-                 cl_uint           num_svm_pointers,
-                 void *            svm_pointers[],
-                 void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
-                                                    cl_uint          num_svm_pointers,
-                                                    void *           svm_pointers[],
-                                                    void *           user_data),
-                 void *            user_data,
-                 cl_uint           num_events_in_wait_list,
-                 const cl_event *  event_wait_list,
-                 cl_event *        event) CL_API_SUFFIX__VERSION_2_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMemcpy(cl_command_queue  command_queue,
-                   cl_bool           blocking_copy,
-                   void *            dst_ptr,
-                   const void *      src_ptr,
-                   size_t            size,
-                   cl_uint           num_events_in_wait_list,
-                   const cl_event *  event_wait_list,
-                   cl_event *        event) CL_API_SUFFIX__VERSION_2_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMemFill(cl_command_queue  command_queue,
-                    void *            svm_ptr,
-                    const void *      pattern,
-                    size_t            pattern_size,
-                    size_t            size,
-                    cl_uint           num_events_in_wait_list,
-                    const cl_event *  event_wait_list,
-                    cl_event *        event) CL_API_SUFFIX__VERSION_2_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMap(cl_command_queue  command_queue,
-                cl_bool           blocking_map,
-                cl_map_flags      flags,
-                void *            svm_ptr,
-                size_t            size,
-                cl_uint           num_events_in_wait_list,
-                const cl_event *  event_wait_list,
-                cl_event *        event) CL_API_SUFFIX__VERSION_2_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMUnmap(cl_command_queue  command_queue,
-                  void *            svm_ptr,
-                  cl_uint           num_events_in_wait_list,
-                  const cl_event *  event_wait_list,
-                  cl_event *        event) CL_API_SUFFIX__VERSION_2_0;
-#ifdef CL_VERSION_2_1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMigrateMem(cl_command_queue         command_queue,
-                       cl_uint                  num_svm_pointers,
-                       const void **            svm_pointers,
-                       const size_t *           sizes,
-                       cl_mem_migration_flags   flags,
-                       cl_uint                  num_events_in_wait_list,
-                       const cl_event *         event_wait_list,
-                       cl_event *               event) CL_API_SUFFIX__VERSION_2_1;
-#ifdef CL_VERSION_1_2
-/* Extension function access
- *
- * Returns the extension function address for the given function name,
- * or NULL if a valid function can not be found.  The client must
- * check to make sure the address is not NULL, before using or
- * calling the returned function address.
- */
-extern CL_API_ENTRY void * CL_API_CALL
-clGetExtensionFunctionAddressForPlatform(cl_platform_id platform,
-                                         const char *   func_name) CL_API_SUFFIX__VERSION_1_2;
-    /*
-     *  WARNING:
-     *     This API introduces mutable state into the OpenCL implementation. It has been REMOVED
-     *  to better facilitate thread safety.  The 1.0 API is not thread safe. It is not tested by the
-     *  OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
-     *  It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
-     *
-     *  Software developers previously relying on this API are instructed to set the command queue
-     *  properties when creating the queue, instead.
-     */
-    extern CL_API_ENTRY cl_int CL_API_CALL
-    clSetCommandQueueProperty(cl_command_queue              command_queue,
-                              cl_command_queue_properties   properties,
-                              cl_bool                       enable,
-                              cl_command_queue_properties * old_properties) CL_API_SUFFIX__VERSION_1_0_DEPRECATED;
-/* Deprecated OpenCL 1.1 APIs */
-clCreateImage2D(cl_context              context,
-                cl_mem_flags            flags,
-                const cl_image_format * image_format,
-                size_t                  image_width,
-                size_t                  image_height,
-                size_t                  image_row_pitch,
-                void *                  host_ptr,
-                cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-clCreateImage3D(cl_context              context,
-                cl_mem_flags            flags,
-                const cl_image_format * image_format,
-                size_t                  image_width,
-                size_t                  image_height,
-                size_t                  image_depth,
-                size_t                  image_row_pitch,
-                size_t                  image_slice_pitch,
-                void *                  host_ptr,
-                cl_int *                errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-clEnqueueMarker(cl_command_queue    command_queue,
-                cl_event *          event) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-clEnqueueWaitForEvents(cl_command_queue  command_queue,
-                        cl_uint          num_events,
-                        const cl_event * event_list) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-clEnqueueBarrier(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-/* Deprecated OpenCL 2.0 APIs */
-clCreateCommandQueue(cl_context                     context,
-                     cl_device_id                   device,
-                     cl_command_queue_properties    properties,
-                     cl_int *                       errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED;
-clCreateSampler(cl_context          context,
-                cl_bool             normalized_coords,
-                cl_addressing_mode  addressing_mode,
-                cl_filter_mode      filter_mode,
-                cl_int *            errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED;
-clEnqueueTask(cl_command_queue  command_queue,
-              cl_kernel         kernel,
-              cl_uint           num_events_in_wait_list,
-              const cl_event *  event_wait_list,
-              cl_event *        event) CL_API_SUFFIX__VERSION_1_2_DEPRECATED;
-#ifdef __cplusplus
-#endif  /* __OPENCL_CL_H */
diff --git a/dependencies/ocl-headers/CL/cl_egl.h b/dependencies/ocl-headers/CL/cl_egl.h
deleted file mode 100644
index 357a37c..0000000
--- a/dependencies/ocl-headers/CL/cl_egl.h
+++ /dev/null
@@ -1,120 +0,0 @@
- * Copyright (c) 2008-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-#ifndef __OPENCL_CL_EGL_H
-#define __OPENCL_CL_EGL_H
-#include <CL/cl.h>
-#ifdef __cplusplus
-extern "C" {
-/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
-/* Error type for clCreateFromEGLImageKHR */
-#define CL_INVALID_EGL_OBJECT_KHR             -1093
-/* CLeglImageKHR is an opaque handle to an EGLImage */
-typedef void* CLeglImageKHR;
-/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
-typedef void* CLeglDisplayKHR;
-/* CLeglSyncKHR is an opaque handle to an EGLSync object */
-typedef void* CLeglSyncKHR;
-/* properties passed to clCreateFromEGLImageKHR */
-typedef intptr_t cl_egl_image_properties_khr;
-#define cl_khr_egl_image 1
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromEGLImageKHR(cl_context                  context,
-                        CLeglDisplayKHR             egldisplay,
-                        CLeglImageKHR               eglimage,
-                        cl_mem_flags                flags,
-                        const cl_egl_image_properties_khr * properties,
-                        cl_int *                    errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
-    cl_context                  context,
-    CLeglDisplayKHR             egldisplay,
-    CLeglImageKHR               eglimage,
-    cl_mem_flags                flags,
-    const cl_egl_image_properties_khr * properties,
-    cl_int *                    errcode_ret);
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
-                              cl_uint          num_objects,
-                              const cl_mem *   mem_objects,
-                              cl_uint          num_events_in_wait_list,
-                              const cl_event * event_wait_list,
-                              cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
-    cl_command_queue command_queue,
-    cl_uint          num_objects,
-    const cl_mem *   mem_objects,
-    cl_uint          num_events_in_wait_list,
-    const cl_event * event_wait_list,
-    cl_event *       event);
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
-                              cl_uint          num_objects,
-                              const cl_mem *   mem_objects,
-                              cl_uint          num_events_in_wait_list,
-                              const cl_event * event_wait_list,
-                              cl_event *       event) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
-    cl_command_queue command_queue,
-    cl_uint          num_objects,
-    const cl_mem *   mem_objects,
-    cl_uint          num_events_in_wait_list,
-    const cl_event * event_wait_list,
-    cl_event *       event);
-#define cl_khr_egl_event 1
-extern CL_API_ENTRY cl_event CL_API_CALL
-clCreateEventFromEGLSyncKHR(cl_context      context,
-                            CLeglSyncKHR    sync,
-                            CLeglDisplayKHR display,
-                            cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
-    cl_context      context,
-    CLeglSyncKHR    sync,
-    CLeglDisplayKHR display,
-    cl_int *        errcode_ret);
-#ifdef __cplusplus
-#endif /* __OPENCL_CL_EGL_H */
diff --git a/dependencies/ocl-headers/CL/cl_ext.h b/dependencies/ocl-headers/CL/cl_ext.h
deleted file mode 100644
index 80a81de..0000000
--- a/dependencies/ocl-headers/CL/cl_ext.h
+++ /dev/null
@@ -1,1708 +0,0 @@
- * Copyright (c) 2008-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-/* cl_ext.h contains OpenCL extensions which don't have external */
-/* (OpenGL, D3D) dependencies.                                   */
-#ifndef __CL_EXT_H
-#define __CL_EXT_H
-#ifdef __cplusplus
-extern "C" {
-#include <CL/cl.h>
-/* cl_khr_fp64 extension - no extension #define since it has no functions  */
-/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
-#define CL_DEVICE_DOUBLE_FP_CONFIG                       0x1032
-/* cl_khr_fp16 extension - no extension #define since it has no functions  */
-#define CL_DEVICE_HALF_FP_CONFIG                    0x1033
-/* Memory object destruction
- *
- * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
- *
- * Registers a user callback function that will be called when the memory object is deleted and its resources
- * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
- * stack associated with memobj. The registered user callback functions are called in the reverse order in
- * which they were registered. The user callback functions are called and then the memory object is deleted
- * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
- * notified when the memory referenced by host_ptr, specified when the memory object is created and used as
- * the storage bits for the memory object, can be reused or freed.
- *
- * The application may not call CL api's with the cl_mem object passed to the pfn_notify.
- *
- * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
- * before using.
- */
-#define cl_APPLE_SetMemObjectDestructor 1
-extern CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorAPPLE(  cl_mem memobj,
-                                        void (* pfn_notify)(cl_mem memobj, void * user_data),
-                                        void * user_data)             CL_API_SUFFIX__VERSION_1_0;
-/* Context Logging Functions
- *
- * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
- * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
- * before using.
- *
- * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger
- */
-#define cl_APPLE_ContextLoggingFunctions 1
-extern CL_API_ENTRY void CL_API_CALL clLogMessagesToSystemLogAPPLE(  const char * errstr,
-                                            const void * private_info,
-                                            size_t       cb,
-                                            void *       user_data)  CL_API_SUFFIX__VERSION_1_0;
-/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
-extern CL_API_ENTRY void CL_API_CALL clLogMessagesToStdoutAPPLE(   const char * errstr,
-                                          const void * private_info,
-                                          size_t       cb,
-                                          void *       user_data)    CL_API_SUFFIX__VERSION_1_0;
-/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
-extern CL_API_ENTRY void CL_API_CALL clLogMessagesToStderrAPPLE(   const char * errstr,
-                                          const void * private_info,
-                                          size_t       cb,
-                                          void *       user_data)    CL_API_SUFFIX__VERSION_1_0;
-* cl_khr_icd extension *
-#define cl_khr_icd 1
-/* cl_platform_info                                                        */
-#define CL_PLATFORM_ICD_SUFFIX_KHR                  0x0920
-/* Additional Error Codes                                                  */
-#define CL_PLATFORM_NOT_FOUND_KHR                   -1001
-extern CL_API_ENTRY cl_int CL_API_CALL
-clIcdGetPlatformIDsKHR(cl_uint          num_entries,
-                       cl_platform_id * platforms,
-                       cl_uint *        num_platforms);
-typedef cl_int
-(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint          num_entries,
-                                         cl_platform_id * platforms,
-                                         cl_uint *        num_platforms);
- * cl_khr_il_program extension *
- *******************************/
-#define cl_khr_il_program 1
-/* New property to clGetDeviceInfo for retrieving supported intermediate
- * languages
- */
-#define CL_DEVICE_IL_VERSION_KHR                    0x105B
-/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
- * program
- */
-#define CL_PROGRAM_IL_KHR                           0x1169
-extern CL_API_ENTRY cl_program CL_API_CALL
-clCreateProgramWithILKHR(cl_context   context,
-                         const void * il,
-                         size_t       length,
-                         cl_int *     errcode_ret);
-typedef cl_program
-(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context   context,
-                                           const void * il,
-                                           size_t       length,
-                                           cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-/* Extension: cl_khr_image2d_from_buffer
- *
- * This extension allows a 2D image to be created from a cl_mem buffer without
- * a copy. The type associated with a 2D image created from a buffer in an
- * OpenCL program is image2d_t. Both the sampler and sampler-less read_image
- * built-in functions are supported for 2D images and 2D images created from
- * a buffer.  Similarly, the write_image built-ins are also supported for 2D
- * images created from a buffer.
- *
- * When the 2D image from buffer is created, the client must specify the
- * width, height, image format (i.e. channel order and channel data type)
- * and optionally the row pitch.
- *
- * The pitch specified must be a multiple of
- * The base address of the buffer must be aligned to
- */
-#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR              0x104A
- * cl_khr_initialize_memory extension *
- **************************************/
-#define CL_CONTEXT_MEMORY_INITIALIZE_KHR            0x2030
- * cl_khr_terminate_context extension *
- **************************************/
-#define CL_CONTEXT_TERMINATED_KHR                   -1121
-#define CL_CONTEXT_TERMINATE_KHR                    0x2032
-#define cl_khr_terminate_context 1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clTerminateContextKHR(cl_context context) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int
-(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_API_SUFFIX__VERSION_1_2;
- * Extension: cl_khr_spir
- *
- * This extension adds support to create an OpenCL program object from a
- * Standard Portable Intermediate Representation (SPIR) instance
- */
-#define CL_DEVICE_SPIR_VERSIONS                     0x40E0
- * cl_khr_create_command_queue extension *
- *****************************************/
-#define cl_khr_create_command_queue 1
-typedef cl_properties cl_queue_properties_khr;
-extern CL_API_ENTRY cl_command_queue CL_API_CALL
-clCreateCommandQueueWithPropertiesKHR(cl_context context,
-                                      cl_device_id device,
-                                      const cl_queue_properties_khr* properties,
-                                      cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_command_queue
-(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context,
-                                                        cl_device_id device,
-                                                        const cl_queue_properties_khr* properties,
-                                                        cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-* cl_nv_device_attribute_query extension *
-/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
-#define CL_DEVICE_REGISTERS_PER_BLOCK_NV            0x4002
-#define CL_DEVICE_WARP_SIZE_NV                      0x4003
-#define CL_DEVICE_GPU_OVERLAP_NV                    0x4004
-#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV            0x4005
-#define CL_DEVICE_INTEGRATED_MEMORY_NV              0x4006
-* cl_amd_device_attribute_query *
-#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD            0x4036
-#define CL_DEVICE_TOPOLOGY_AMD                          0x4037
-#define CL_DEVICE_BOARD_NAME_AMD                        0x4038
-#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD                0x4039
-#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD             0x4040
-#define CL_DEVICE_SIMD_WIDTH_AMD                        0x4041
-#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD            0x4042
-#define CL_DEVICE_WAVEFRONT_WIDTH_AMD                   0x4043
-#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD               0x4044
-#define CL_DEVICE_LOCAL_MEM_BANKS_AMD                   0x4048
-#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD            0x4049
-#define CL_DEVICE_GFXIP_MAJOR_AMD                       0x404A
-#define CL_DEVICE_GFXIP_MINOR_AMD                       0x404B
-#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD               0x4031
-#define CL_DEVICE_PCIE_ID_AMD                           0x4034
-* cl_arm_printf extension
-#define CL_PRINTF_CALLBACK_ARM                      0x40B0
-#define CL_PRINTF_BUFFERSIZE_ARM                    0x40B1
-* cl_ext_device_fission extension
-#define cl_ext_device_fission   1
-extern CL_API_ENTRY cl_int CL_API_CALL
-clReleaseDeviceEXT(cl_device_id device) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int
-(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_API_SUFFIX__VERSION_1_1;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clRetainDeviceEXT(cl_device_id device) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int
-(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_ulong  cl_device_partition_property_ext;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clCreateSubDevicesEXT(cl_device_id   in_device,
-                      const cl_device_partition_property_ext * properties,
-                      cl_uint        num_entries,
-                      cl_device_id * out_devices,
-                      cl_uint *      num_devices) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int
-(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id   in_device,
-                                         const cl_device_partition_property_ext * properties,
-                                         cl_uint        num_entries,
-                                         cl_device_id * out_devices,
-                                         cl_uint *      num_devices) CL_API_SUFFIX__VERSION_1_1;
-/* cl_device_partition_property_ext */
-#define CL_DEVICE_PARTITION_EQUALLY_EXT             0x4050
-#define CL_DEVICE_PARTITION_BY_COUNTS_EXT           0x4051
-#define CL_DEVICE_PARTITION_BY_NAMES_EXT            0x4052
-/* clDeviceGetInfo selectors */
-#define CL_DEVICE_PARENT_DEVICE_EXT                 0x4054
-#define CL_DEVICE_PARTITION_TYPES_EXT               0x4055
-#define CL_DEVICE_AFFINITY_DOMAINS_EXT              0x4056
-#define CL_DEVICE_REFERENCE_COUNT_EXT               0x4057
-#define CL_DEVICE_PARTITION_STYLE_EXT               0x4058
-/* error codes */
-#define CL_DEVICE_PARTITION_FAILED_EXT              -1057
-#define CL_INVALID_PARTITION_COUNT_EXT              -1058
-#define CL_INVALID_PARTITION_NAME_EXT               -1059
-#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT             0x1
-#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT             0x2
-#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT             0x3
-#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT             0x4
-#define CL_AFFINITY_DOMAIN_NUMA_EXT                 0x10
-/* cl_device_partition_property_ext list terminators */
-#define CL_PROPERTIES_LIST_END_EXT                  ((cl_device_partition_property_ext) 0)
-#define CL_PARTITION_BY_COUNTS_LIST_END_EXT         ((cl_device_partition_property_ext) 0)
-#define CL_PARTITION_BY_NAMES_LIST_END_EXT          ((cl_device_partition_property_ext) 0 - 1)
- * cl_ext_migrate_memobject extension definitions
- ***********************************/
-#define cl_ext_migrate_memobject 1
-typedef cl_bitfield cl_mem_migration_flags_ext;
-#define CL_MIGRATE_MEM_OBJECT_HOST_EXT              0x1
-#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT           0x4040
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue,
-                             cl_uint          num_mem_objects,
-                             const cl_mem *   mem_objects,
-                             cl_mem_migration_flags_ext flags,
-                             cl_uint          num_events_in_wait_list,
-                             const cl_event * event_wait_list,
-                             cl_event *       event);
-typedef cl_int
-(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue,
-                                               cl_uint          num_mem_objects,
-                                               const cl_mem *   mem_objects,
-                                               cl_mem_migration_flags_ext flags,
-                                               cl_uint          num_events_in_wait_list,
-                                               const cl_event * event_wait_list,
-                                               cl_event *       event);
-* cl_ext_cxx_for_opencl extension
-#define cl_ext_cxx_for_opencl 1
-* cl_qcom_ext_host_ptr extension
-#define cl_qcom_ext_host_ptr 1
-#define CL_MEM_EXT_HOST_PTR_QCOM                  (1 << 29)
-#define CL_DEVICE_PAGE_SIZE_QCOM                  0x40A1
-#define CL_IMAGE_ROW_ALIGNMENT_QCOM               0x40A2
-#define CL_IMAGE_SLICE_ALIGNMENT_QCOM             0x40A3
-#define CL_MEM_HOST_UNCACHED_QCOM                 0x40A4
-#define CL_MEM_HOST_WRITEBACK_QCOM                0x40A5
-#define CL_MEM_HOST_WRITETHROUGH_QCOM             0x40A6
-#define CL_MEM_HOST_WRITE_COMBINING_QCOM          0x40A7
-typedef cl_uint                                   cl_image_pitch_info_qcom;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetDeviceImageInfoQCOM(cl_device_id             device,
-                         size_t                   image_width,
-                         size_t                   image_height,
-                         const cl_image_format   *image_format,
-                         cl_image_pitch_info_qcom param_name,
-                         size_t                   param_value_size,
-                         void                    *param_value,
-                         size_t                  *param_value_size_ret);
-typedef struct _cl_mem_ext_host_ptr
-    /* Type of external memory allocation. */
-    /* Legal values will be defined in layered extensions. */
-    cl_uint  allocation_type;
-    /* Host cache policy for this external memory allocation. */
-    cl_uint  host_cache_policy;
-} cl_mem_ext_host_ptr;
-* cl_qcom_ext_host_ptr_iocoherent extension
-/* Cache policy specifying io-coherence */
-#define CL_MEM_HOST_IOCOHERENT_QCOM               0x40A9
-* cl_qcom_ion_host_ptr extension
-#define CL_MEM_ION_HOST_PTR_QCOM                  0x40A8
-typedef struct _cl_mem_ion_host_ptr
-    /* Type of external memory allocation. */
-    /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
-    cl_mem_ext_host_ptr  ext_host_ptr;
-    /* ION file descriptor */
-    int                  ion_filedesc;
-    /* Host pointer to the ION allocated memory */
-    void*                ion_hostptr;
-} cl_mem_ion_host_ptr;
-* cl_qcom_android_native_buffer_host_ptr extension
-#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM                  0x40C6
-typedef struct _cl_mem_android_native_buffer_host_ptr
-    /* Type of external memory allocation. */
-    /* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
-    cl_mem_ext_host_ptr  ext_host_ptr;
-    /* Virtual pointer to the android native buffer */
-    void*                anb_ptr;
-} cl_mem_android_native_buffer_host_ptr;
- * cl_img_yuv_image extension *
- ******************************************/
-/* Image formats used in clCreateImage */
-#define CL_NV21_IMG                                 0x40D0
-#define CL_YV12_IMG                                 0x40D1
- * cl_img_cached_allocations extension *
- ******************************************/
-/* Flag values used by clCreateBuffer */
-#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG          (1 << 26)
-#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG            (1 << 27)
- * cl_img_use_gralloc_ptr extension *
- ******************************************/
-#define cl_img_use_gralloc_ptr 1
-/* Flag values used by clCreateBuffer */
-#define CL_MEM_USE_GRALLOC_PTR_IMG                  (1 << 28)
-/* To be used by clGetEventInfo: */
-/* Error codes from clEnqueueAcquireGrallocObjectsIMG and clEnqueueReleaseGrallocObjectsIMG */
-#define CL_INVALID_GRALLOC_OBJECT_IMG               0x40D5
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueAcquireGrallocObjectsIMG(cl_command_queue      command_queue,
-                                  cl_uint               num_objects,
-                                  const cl_mem *        mem_objects,
-                                  cl_uint               num_events_in_wait_list,
-                                  const cl_event *      event_wait_list,
-                                  cl_event *            event) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReleaseGrallocObjectsIMG(cl_command_queue      command_queue,
-                                  cl_uint               num_objects,
-                                  const cl_mem *        mem_objects,
-                                  cl_uint               num_events_in_wait_list,
-                                  const cl_event *      event_wait_list,
-                                  cl_event *            event) CL_API_SUFFIX__VERSION_1_2;
- * cl_img_generate_mipmap extension *
- ******************************************/
-#define cl_img_generate_mipmap 1
-typedef cl_uint cl_mipmap_filter_mode_img;
-/* To be used by clEnqueueGenerateMipmapIMG */
-/* To be used by clGetEventInfo */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueGenerateMipmapIMG(cl_command_queue          command_queue,
-                           cl_mem                    src_image,
-                           cl_mem                    dst_image,
-                           cl_mipmap_filter_mode_img mipmap_filter_mode,
-                           const size_t              *array_region,
-                           const size_t              *mip_region,
-                           cl_uint                   num_events_in_wait_list,
-                           const cl_event            *event_wait_list,
-                           cl_event *event) CL_API_SUFFIX__VERSION_1_2;
- * cl_img_mem_properties extension *
- ******************************************/
-#define cl_img_mem_properties 1
-/* To be used by clCreateBufferWithProperties */
-#define CL_MEM_ALLOC_FLAGS_IMG 0x40D7
-/* To be used wiith the CL_MEM_ALLOC_FLAGS_IMG property */
-typedef cl_bitfield cl_mem_alloc_flags_img;
-/* To be used with cl_mem_alloc_flags_img */
-* cl_khr_subgroups extension
-#define cl_khr_subgroups 1
-#if !defined(CL_VERSION_2_1)
-/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
-   In hindsight, there should have been a khr suffix on this type for
-   the extension, but keeping it un-suffixed to maintain backwards
-   compatibility. */
-typedef cl_uint             cl_kernel_sub_group_info;
-/* cl_kernel_sub_group_info */
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetKernelSubGroupInfoKHR(cl_kernel    in_kernel,
-                           cl_device_id in_device,
-                           cl_kernel_sub_group_info param_name,
-                           size_t       input_value_size,
-                           const void * input_value,
-                           size_t       param_value_size,
-                           void *       param_value,
-                           size_t *     param_value_size_ret) CL_API_SUFFIX__VERSION_2_0_DEPRECATED;
-typedef cl_int
-(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel    in_kernel,
-                                              cl_device_id in_device,
-                                              cl_kernel_sub_group_info param_name,
-                                              size_t       input_value_size,
-                                              const void * input_value,
-                                              size_t       param_value_size,
-                                              void *       param_value,
-                                              size_t *     param_value_size_ret) CL_API_SUFFIX__VERSION_2_0_DEPRECATED;
-* cl_khr_mipmap_image extension
-/* cl_sampler_properties */
-#define CL_SAMPLER_MIP_FILTER_MODE_KHR              0x1155
-#define CL_SAMPLER_LOD_MIN_KHR                      0x1156
-#define CL_SAMPLER_LOD_MAX_KHR                      0x1157
-* cl_khr_priority_hints extension
-/* This extension define is for backwards compatibility.
-   It shouldn't be required since this extension has no new functions. */
-#define cl_khr_priority_hints 1
-typedef cl_uint  cl_queue_priority_khr;
-/* cl_command_queue_properties */
-#define CL_QUEUE_PRIORITY_KHR 0x1096
-/* cl_queue_priority_khr */
-* cl_khr_throttle_hints extension
-/* This extension define is for backwards compatibility.
-   It shouldn't be required since this extension has no new functions. */
-#define cl_khr_throttle_hints 1
-typedef cl_uint  cl_queue_throttle_khr;
-/* cl_command_queue_properties */
-#define CL_QUEUE_THROTTLE_KHR 0x1097
-/* cl_queue_throttle_khr */
-* cl_khr_subgroup_named_barrier
-/* This extension define is for backwards compatibility.
-   It shouldn't be required since this extension has no new functions. */
-#define cl_khr_subgroup_named_barrier 1
-/* cl_device_info */
-* cl_khr_extended_versioning
-#define cl_khr_extended_versioning 1
-#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR)
-#define CL_MAKE_VERSION_KHR(major, minor, patch) \
-    ((patch) & CL_VERSION_PATCH_MASK_KHR))
-typedef cl_uint cl_version_khr;
-typedef struct _cl_name_version_khr
-    cl_version_khr version;
-} cl_name_version_khr;
-/* cl_platform_info */
-#define CL_PLATFORM_NUMERIC_VERSION_KHR                  0x0906
-/* cl_device_info */
-#define CL_DEVICE_NUMERIC_VERSION_KHR                    0x105E
-#define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR            0x1060
-#define CL_DEVICE_ILS_WITH_VERSION_KHR                   0x1061
-* cl_khr_device_uuid extension
-#define cl_khr_device_uuid 1
-#define CL_UUID_SIZE_KHR 16
-#define CL_LUID_SIZE_KHR 8
-#define CL_DEVICE_UUID_KHR          0x106A
-#define CL_DRIVER_UUID_KHR          0x106B
-#define CL_DEVICE_LUID_VALID_KHR    0x106C
-#define CL_DEVICE_LUID_KHR          0x106D
-#define CL_DEVICE_NODE_MASK_KHR     0x106E
-* cl_khr_pci_bus_info
-#define cl_khr_pci_bus_info 1
-typedef struct _cl_device_pci_bus_info_khr {
-    cl_uint pci_domain;
-    cl_uint pci_bus;
-    cl_uint pci_device;
-    cl_uint pci_function;
-} cl_device_pci_bus_info_khr;
-/* cl_device_info */
-#define CL_DEVICE_PCI_BUS_INFO_KHR                          0x410F
-* cl_khr_suggested_local_work_size
-#define cl_khr_suggested_local_work_size 1
-extern CL_API_ENTRY cl_int CL_API_CALL
-    cl_command_queue command_queue,
-    cl_kernel kernel,
-    cl_uint work_dim,
-    const size_t* global_work_offset,
-    const size_t* global_work_size,
-    size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0;
-typedef cl_int (CL_API_CALL *
-    cl_command_queue command_queue,
-    cl_kernel kernel,
-    cl_uint work_dim,
-    const size_t* global_work_offset,
-    const size_t* global_work_size,
-    size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0;
- * cl_arm_import_memory extension *
- **********************************/
-#define cl_arm_import_memory 1
-typedef intptr_t cl_import_properties_arm;
-/* Default and valid proporties name for cl_arm_import_memory */
-#define CL_IMPORT_TYPE_ARM                        0x40B2
-/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
-#define CL_IMPORT_TYPE_HOST_ARM                   0x40B3
-/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
-#define CL_IMPORT_TYPE_DMA_BUF_ARM                0x40B4
-/* Protected memory property */
-#define CL_IMPORT_TYPE_PROTECTED_ARM              0x40B5
-/* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */
-/* Data consistency with host property */
-/* Index of plane in a multiplanar hardware buffer */
-/* Index of layer in a multilayer hardware buffer */
-/* Import memory size value to indicate a size for the whole buffer */
-/* This extension adds a new function that allows for direct memory import into
- * OpenCL via the clImportMemoryARM function.
- *
- * Memory imported through this interface will be mapped into the device's page
- * tables directly, providing zero copy access. It will never fall back to copy
- * operations and aliased buffers.
- *
- * Types of memory supported for import are specified as additional extension
- * strings.
- *
- * This extension produces cl_mem allocations which are compatible with all other
- * users of cl_mem in the standard API.
- *
- * This extension maps pages with the same properties as the normal buffer creation
- * function clCreateBuffer.
- */
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clImportMemoryARM( cl_context context,
-                   cl_mem_flags flags,
-                   const cl_import_properties_arm *properties,
-                   void *memory,
-                   size_t size,
-                   cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0;
- * cl_arm_shared_virtual_memory extension *
- ******************************************/
-#define cl_arm_shared_virtual_memory 1
-/* Used by clGetDeviceInfo */
-#define CL_DEVICE_SVM_CAPABILITIES_ARM                  0x40B6
-/* Used by clGetMemObjectInfo */
-#define CL_MEM_USES_SVM_POINTER_ARM                     0x40B7
-/* Used by clSetKernelExecInfoARM: */
-#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM                0x40B8
-/* To be used by clGetEventInfo: */
-#define CL_COMMAND_SVM_FREE_ARM                         0x40BA
-#define CL_COMMAND_SVM_MEMCPY_ARM                       0x40BB
-#define CL_COMMAND_SVM_MEMFILL_ARM                      0x40BC
-#define CL_COMMAND_SVM_MAP_ARM                          0x40BD
-#define CL_COMMAND_SVM_UNMAP_ARM                        0x40BE
-/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
-#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM           (1 << 0)
-#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM             (1 << 1)
-#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM             (1 << 2)
-#define CL_DEVICE_SVM_ATOMICS_ARM                       (1 << 3)
-/* Flag values used by clSVMAllocARM: */
-#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM                (1 << 10)
-#define CL_MEM_SVM_ATOMICS_ARM                          (1 << 11)
-typedef cl_bitfield cl_svm_mem_flags_arm;
-typedef cl_uint     cl_kernel_exec_info_arm;
-typedef cl_bitfield cl_device_svm_capabilities_arm;
-extern CL_API_ENTRY void * CL_API_CALL
-clSVMAllocARM(cl_context       context,
-              cl_svm_mem_flags_arm flags,
-              size_t           size,
-              cl_uint          alignment) CL_API_SUFFIX__VERSION_1_2;
-clSVMFreeARM(cl_context        context,
-             void *            svm_pointer) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMFreeARM(cl_command_queue  command_queue,
-                    cl_uint           num_svm_pointers,
-                    void *            svm_pointers[],
-                    void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
-                                                       cl_uint          num_svm_pointers,
-                                                       void *           svm_pointers[],
-                                                       void *           user_data),
-                    void *            user_data,
-                    cl_uint           num_events_in_wait_list,
-                    const cl_event *  event_wait_list,
-                    cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMemcpyARM(cl_command_queue  command_queue,
-                      cl_bool           blocking_copy,
-                      void *            dst_ptr,
-                      const void *      src_ptr,
-                      size_t            size,
-                      cl_uint           num_events_in_wait_list,
-                      const cl_event *  event_wait_list,
-                      cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMemFillARM(cl_command_queue  command_queue,
-                       void *            svm_ptr,
-                       const void *      pattern,
-                       size_t            pattern_size,
-                       size_t            size,
-                       cl_uint           num_events_in_wait_list,
-                       const cl_event *  event_wait_list,
-                       cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMMapARM(cl_command_queue  command_queue,
-                   cl_bool           blocking_map,
-                   cl_map_flags      flags,
-                   void *            svm_ptr,
-                   size_t            size,
-                   cl_uint           num_events_in_wait_list,
-                   const cl_event *  event_wait_list,
-                   cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueSVMUnmapARM(cl_command_queue  command_queue,
-                     void *            svm_ptr,
-                     cl_uint           num_events_in_wait_list,
-                     const cl_event *  event_wait_list,
-                     cl_event *        event) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelArgSVMPointerARM(cl_kernel    kernel,
-                            cl_uint      arg_index,
-                            const void * arg_value) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clSetKernelExecInfoARM(cl_kernel            kernel,
-                       cl_kernel_exec_info_arm  param_name,
-                       size_t               param_value_size,
-                       const void *         param_value) CL_API_SUFFIX__VERSION_1_2;
- * cl_arm_get_core_id extension *
- ********************************/
-#ifdef CL_VERSION_1_2
-#define cl_arm_get_core_id 1
-/* Device info property for bitfield of cores present */
-#endif  /* CL_VERSION_1_2 */
-* cl_arm_job_slot_selection
-#define cl_arm_job_slot_selection 1
-/* cl_device_info */
-#define CL_DEVICE_JOB_SLOTS_ARM                   0x41E0
-/* cl_command_queue_properties */
-#define CL_QUEUE_JOB_SLOT_ARM                     0x41E1
-* cl_arm_scheduling_controls
-#define cl_arm_scheduling_controls 1
-typedef cl_bitfield cl_device_scheduling_controls_capabilities_arm;
-/* cl_device_info */
-#define CL_DEVICE_SCHEDULING_KERNEL_BATCHING_ARM               (1 << 0)
-#define CL_DEVICE_SCHEDULING_DEFERRED_FLUSH_ARM                (1 << 3)
-/* cl_kernel_info */
-/* cl_queue_properties */
-#define CL_QUEUE_KERNEL_BATCHING_ARM                            0x41E7
-#define CL_QUEUE_DEFERRED_FLUSH_ARM                             0x41EC
-* cl_arm_controlled_kernel_termination
-#define cl_arm_controlled_kernel_termination 1
-/* Error code to indicate kernel terminated with failure */
-/* cl_device_info */
-/* Bit fields for controlled termination feature query */
-typedef cl_bitfield cl_device_controlled_termination_capabilities_arm;
-/* cl_event_info */
-/* Values returned for event termination reason query */
-typedef cl_uint cl_command_termination_reason_arm;
-* cl_intel_thread_local_exec extension *
-#define cl_intel_thread_local_exec 1
-#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL      (((cl_bitfield)1) << 31)
-* cl_intel_device_partition_by_names extension *
-#define cl_intel_device_partition_by_names 1
-#define CL_DEVICE_PARTITION_BY_NAMES_INTEL          0x4052
-* cl_intel_accelerator extension                *
-* cl_intel_motion_estimation extension          *
-* cl_intel_advanced_motion_estimation extension *
-#define cl_intel_accelerator 1
-#define cl_intel_motion_estimation 1
-#define cl_intel_advanced_motion_estimation 1
-typedef struct _cl_accelerator_intel* cl_accelerator_intel;
-typedef cl_uint cl_accelerator_type_intel;
-typedef cl_uint cl_accelerator_info_intel;
-typedef struct _cl_motion_estimation_desc_intel {
-    cl_uint mb_block_type;
-    cl_uint subpixel_mode;
-    cl_uint sad_adjust_mode;
-    cl_uint search_path_type;
-} cl_motion_estimation_desc_intel;
-/* error codes */
-#define CL_INVALID_ACCELERATOR_INTEL                              -1094
-#define CL_INVALID_ACCELERATOR_TYPE_INTEL                         -1095
-#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL                   -1096
-#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL                   -1097
-/* cl_accelerator_type_intel */
-/* cl_accelerator_info_intel */
-#define CL_ACCELERATOR_DESCRIPTOR_INTEL                           0x4090
-#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL                      0x4091
-#define CL_ACCELERATOR_CONTEXT_INTEL                              0x4092
-#define CL_ACCELERATOR_TYPE_INTEL                                 0x4093
-/* cl_motion_detect_desc_intel flags */
-#define CL_ME_MB_TYPE_16x16_INTEL                                 0x0
-#define CL_ME_MB_TYPE_8x8_INTEL                                   0x1
-#define CL_ME_MB_TYPE_4x4_INTEL                                   0x2
-#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL                         0x0
-#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL                            0x1
-#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL                            0x2
-#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL                          0x0
-#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL                          0x1
-#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL                        0x0
-#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL                        0x1
-#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL                      0x5
-#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL                         0x0
-#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL                  0x1
-#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL                    0x2
-#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL                           0x4
-#define CL_ME_FORWARD_INPUT_MODE_INTEL                            0x1
-#define CL_ME_BACKWARD_INPUT_MODE_INTEL                           0x2
-#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL                        0x3
-#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL                          16
-#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL                            21
-#define CL_ME_BIDIR_WEIGHT_HALF_INTEL                             32
-#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL                        43
-#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL                    48
-#define CL_ME_COST_PENALTY_NONE_INTEL                             0x0
-#define CL_ME_COST_PENALTY_LOW_INTEL                              0x1
-#define CL_ME_COST_PENALTY_NORMAL_INTEL                           0x2
-#define CL_ME_COST_PENALTY_HIGH_INTEL                             0x3
-#define CL_ME_COST_PRECISION_QPEL_INTEL                           0x0
-#define CL_ME_COST_PRECISION_HPEL_INTEL                           0x1
-#define CL_ME_COST_PRECISION_PEL_INTEL                            0x2
-#define CL_ME_COST_PRECISION_DPEL_INTEL                           0x3
-#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL                  0x0
-#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL                        0x2
-#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL                     0x4
-#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL                      0x0
-#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL                   0x3
-/* cl_device_info */
-#define CL_DEVICE_ME_VERSION_INTEL                                0x407E
-#define CL_ME_VERSION_LEGACY_INTEL                                0x0
-#define CL_ME_VERSION_ADVANCED_VER_1_INTEL                        0x1
-#define CL_ME_VERSION_ADVANCED_VER_2_INTEL                        0x2
-extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
-    cl_context                   context,
-    cl_accelerator_type_intel    accelerator_type,
-    size_t                       descriptor_size,
-    const void*                  descriptor,
-    cl_int*                      errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
-    cl_context                   context,
-    cl_accelerator_type_intel    accelerator_type,
-    size_t                       descriptor_size,
-    const void*                  descriptor,
-    cl_int*                      errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-    cl_accelerator_intel         accelerator,
-    cl_accelerator_info_intel    param_name,
-    size_t                       param_value_size,
-    void*                        param_value,
-    size_t*                      param_value_size_ret) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
-    cl_accelerator_intel         accelerator,
-    cl_accelerator_info_intel    param_name,
-    size_t                       param_value_size,
-    void*                        param_value,
-    size_t*                      param_value_size_ret) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-    cl_accelerator_intel         accelerator) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
-    cl_accelerator_intel         accelerator) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_int CL_API_CALL
-    cl_accelerator_intel         accelerator) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
-    cl_accelerator_intel         accelerator) CL_API_SUFFIX__VERSION_1_2;
-* cl_intel_simultaneous_sharing extension *
-#define cl_intel_simultaneous_sharing 1
-* cl_intel_egl_image_yuv extension *
-#define cl_intel_egl_image_yuv 1
-#define CL_EGL_YUV_PLANE_INTEL                           0x4107
-* cl_intel_packed_yuv extension *
-#define cl_intel_packed_yuv 1
-#define CL_YUYV_INTEL                                    0x4076
-#define CL_UYVY_INTEL                                    0x4077
-#define CL_YVYU_INTEL                                    0x4078
-#define CL_VYUY_INTEL                                    0x4079
-* cl_intel_required_subgroup_size extension *
-#define cl_intel_required_subgroup_size 1
-#define CL_DEVICE_SUB_GROUP_SIZES_INTEL                  0x4108
-#define CL_KERNEL_SPILL_MEM_SIZE_INTEL                   0x4109
-* cl_intel_driver_diagnostics extension *
-#define cl_intel_driver_diagnostics 1
-typedef cl_uint cl_diagnostics_verbose_level;
-#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL                0x4106
-#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL           ( 0xff )
-#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL           ( 1 << 1 )
-* cl_intel_planar_yuv extension *
-#define CL_NV12_INTEL                                       0x410E
-#define CL_MEM_NO_ACCESS_INTEL                              ( 1 << 24 )
-#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL              ( 1 << 25 )
-#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL                0x417E
-#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL               0x417F
-* cl_intel_device_side_avc_motion_estimation extension *
-#define CL_DEVICE_AVC_ME_VERSION_INTEL                      0x410B
-#define CL_AVC_ME_VERSION_0_INTEL                           0x0   /* No support. */
-#define CL_AVC_ME_VERSION_1_INTEL                           0x1   /* First supported version. */
-#define CL_AVC_ME_MAJOR_16x16_INTEL                         0x0
-#define CL_AVC_ME_MAJOR_16x8_INTEL                          0x1
-#define CL_AVC_ME_MAJOR_8x16_INTEL                          0x2
-#define CL_AVC_ME_MAJOR_8x8_INTEL                           0x3
-#define CL_AVC_ME_MINOR_8x8_INTEL                           0x0
-#define CL_AVC_ME_MINOR_8x4_INTEL                           0x1
-#define CL_AVC_ME_MINOR_4x8_INTEL                           0x2
-#define CL_AVC_ME_MINOR_4x4_INTEL                           0x3
-#define CL_AVC_ME_MAJOR_FORWARD_INTEL                       0x0
-#define CL_AVC_ME_MAJOR_BACKWARD_INTEL                      0x1
-#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL                 0x2
-#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL                  0x0
-#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL                0x7E
-#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL                 0x7D
-#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL                 0x7B
-#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL                  0x77
-#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL                  0x6F
-#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL                  0x5F
-#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL                  0x3F
-#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL                 0x1
-#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL                  0x2
-#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL               0x4
-#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL             0x6
-#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL             0x7
-#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL                0x8
-#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL          0x9
-#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL            0x2
-#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL            0xa
-#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL                0x0
-#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL                0x2
-#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL               0x0
-#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL                  0x1
-#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL                  0x3
-#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL                 0x0
-#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL                 0x1
-#define CL_AVC_ME_COST_PRECISION_PEL_INTEL                  0x2
-#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL                 0x3
-#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL                0x10
-#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL                  0x15
-#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL                   0x20
-#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL              0x2B
-#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL                 0x0
-#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL                0x2
-#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL                  0x4
-#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL               0x8
-#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL          0x0
-#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL            0x4000
-#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL     ( 0x1 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL    ( 0x2 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL        ( 0x3 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL       ( 0x55 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL      ( 0xAA << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL          ( 0xFF << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL     ( 0x1 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL    ( 0x2 << 24 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL     ( 0x1 << 26 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL    ( 0x2 << 26 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL     ( 0x1 << 28 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL    ( 0x2 << 28 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL     ( 0x1 << 30 )
-#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL    ( 0x2 << 30 )
-#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL                0x00
-#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL                0x80
-#define CL_AVC_ME_INTRA_16x16_INTEL                         0x0
-#define CL_AVC_ME_INTRA_8x8_INTEL                           0x1
-#define CL_AVC_ME_INTRA_4x4_INTEL                           0x2
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL                  0x2
-#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL               0x4
-#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL                0x0
-#define CL_AVC_ME_FRAME_FORWARD_INTEL                       0x1
-#define CL_AVC_ME_FRAME_BACKWARD_INTEL                      0x2
-#define CL_AVC_ME_FRAME_DUAL_INTEL                          0x3
-#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL                     0x0
-#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL                    0x1
-#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL                    0x2
-* cl_intel_unified_shared_memory extension *
-/* These APIs are in sync with Revision Q of the cl_intel_unified_shared_memory spec! */
-#define cl_intel_unified_shared_memory 1
-/* cl_device_info */
-#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL                   0x4190
-#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL                 0x4191
-typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel;
-/* cl_device_unified_shared_memory_capabilities_intel - bitfield */
-#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL                   (1 << 0)
-typedef cl_properties cl_mem_properties_intel;
-/* cl_mem_properties_intel */
-#define CL_MEM_ALLOC_FLAGS_INTEL        0x4195
-typedef cl_bitfield cl_mem_alloc_flags_intel;
-/* cl_mem_alloc_flags_intel - bitfield */
-#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL               (1 << 0)
-typedef cl_uint cl_mem_info_intel;
-/* cl_mem_alloc_info_intel */
-#define CL_MEM_ALLOC_TYPE_INTEL         0x419A
-#define CL_MEM_ALLOC_BASE_PTR_INTEL     0x419B
-#define CL_MEM_ALLOC_SIZE_INTEL         0x419C
-#define CL_MEM_ALLOC_DEVICE_INTEL       0x419D
-/* Enum values 0x419E-0x419F are reserved for future queries. */
-typedef cl_uint cl_unified_shared_memory_type_intel;
-/* cl_unified_shared_memory_type_intel */
-#define CL_MEM_TYPE_UNKNOWN_INTEL       0x4196
-#define CL_MEM_TYPE_HOST_INTEL          0x4197
-#define CL_MEM_TYPE_DEVICE_INTEL        0x4198
-#define CL_MEM_TYPE_SHARED_INTEL        0x4199
-typedef cl_uint cl_mem_advice_intel;
-/* cl_mem_advice_intel */
-/* Enum values 0x4208-0x420F are reserved for future memory advices. */
-/* cl_kernel_exec_info */
-#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL                  0x4203
-/* cl_command_type */
-#define CL_COMMAND_MEMFILL_INTEL        0x4204
-#define CL_COMMAND_MEMCPY_INTEL         0x4205
-#define CL_COMMAND_MEMADVISE_INTEL      0x4207
-extern CL_API_ENTRY void* CL_API_CALL
-            cl_context context,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-typedef void* (CL_API_CALL *
-            cl_context context,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-extern CL_API_ENTRY void* CL_API_CALL
-            cl_context context,
-            cl_device_id device,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-typedef void* (CL_API_CALL *
-            cl_context context,
-            cl_device_id device,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-extern CL_API_ENTRY void* CL_API_CALL
-            cl_context context,
-            cl_device_id device,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-typedef void* (CL_API_CALL *
-            cl_context context,
-            cl_device_id device,
-            const cl_mem_properties_intel* properties,
-            size_t size,
-            cl_uint alignment,
-            cl_int* errcode_ret);
-extern CL_API_ENTRY cl_int CL_API_CALL
-            cl_context context,
-            void* ptr);
-typedef cl_int (CL_API_CALL *
-            cl_context context,
-            void* ptr);
-extern CL_API_ENTRY cl_int CL_API_CALL
-            cl_context context,
-            void* ptr);
-typedef cl_int (CL_API_CALL *
-            cl_context context,
-            void* ptr);
-extern CL_API_ENTRY cl_int CL_API_CALL
-            cl_context context,
-            const void* ptr,
-            cl_mem_info_intel param_name,
-            size_t param_value_size,
-            void* param_value,
-            size_t* param_value_size_ret);
-typedef cl_int (CL_API_CALL *
-            cl_context context,
-            const void* ptr,
-            cl_mem_info_intel param_name,
-            size_t param_value_size,
-            void* param_value,
-            size_t* param_value_size_ret);
-extern CL_API_ENTRY cl_int CL_API_CALL
-            cl_kernel kernel,
-            cl_uint arg_index,
-            const void* arg_value);
-typedef cl_int (CL_API_CALL *
-            cl_kernel kernel,
-            cl_uint arg_index,
-            const void* arg_value);
-extern CL_API_ENTRY cl_int CL_API_CALL
-            cl_command_queue command_queue,
-            void* dst_ptr,
-            cl_int value,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-typedef cl_int (CL_API_CALL *
-            cl_command_queue command_queue,
-            void* dst_ptr,
-            cl_int value,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-extern CL_API_ENTRY cl_int CL_API_CALL
-            cl_command_queue command_queue,
-            void* dst_ptr,
-            const void* pattern,
-            size_t pattern_size,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-typedef cl_int (CL_API_CALL *
-            cl_command_queue command_queue,
-            void* dst_ptr,
-            const void* pattern,
-            size_t pattern_size,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-extern CL_API_ENTRY cl_int CL_API_CALL
-            cl_command_queue command_queue,
-            cl_bool blocking,
-            void* dst_ptr,
-            const void* src_ptr,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-typedef cl_int (CL_API_CALL *
-            cl_command_queue command_queue,
-            cl_bool blocking,
-            void* dst_ptr,
-            const void* src_ptr,
-            size_t size,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-#ifdef CL_VERSION_1_2
-/* Because these APIs use cl_mem_migration_flags, they require
-   OpenCL 1.2: */
-extern CL_API_ENTRY cl_int CL_API_CALL
-            cl_command_queue command_queue,
-            const void* ptr,
-            size_t size,
-            cl_mem_migration_flags flags,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-typedef cl_int (CL_API_CALL *
-            cl_command_queue command_queue,
-            const void* ptr,
-            size_t size,
-            cl_mem_migration_flags flags,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-extern CL_API_ENTRY cl_int CL_API_CALL
-            cl_command_queue command_queue,
-            const void* ptr,
-            size_t size,
-            cl_mem_advice_intel advice,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-typedef cl_int (CL_API_CALL *
-            cl_command_queue command_queue,
-            const void* ptr,
-            size_t size,
-            cl_mem_advice_intel advice,
-            cl_uint num_events_in_wait_list,
-            const cl_event* event_wait_list,
-            cl_event* event);
-* cl_intel_create_buffer_with_properties extension *
-#define cl_intel_create_buffer_with_properties 1
-extern CL_API_ENTRY cl_mem CL_API_CALL
-    cl_context   context,
-    const cl_mem_properties_intel* properties,
-    cl_mem_flags flags,
-    size_t       size,
-    void *       host_ptr,
-    cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_mem (CL_API_CALL *
-    cl_context   context,
-    const cl_mem_properties_intel* properties,
-    cl_mem_flags flags,
-    size_t       size,
-    void *       host_ptr,
-    cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-* cl_intel_mem_channel_property extension *
-#define CL_MEM_CHANNEL_INTEL            0x4213
-* cl_intel_mem_force_host_memory *
-#define cl_intel_mem_force_host_memory 1
-/* cl_mem_flags */
-#define CL_MEM_FORCE_HOST_MEMORY_INTEL                      (1 << 20)
-* cl_intel_command_queue_families
-#define cl_intel_command_queue_families 1
-typedef cl_bitfield         cl_command_queue_capabilities_intel;
-#define CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL                 64
-typedef struct _cl_queue_family_properties_intel {
-    cl_command_queue_properties properties;
-    cl_command_queue_capabilities_intel capabilities;
-    cl_uint count;
-} cl_queue_family_properties_intel;
-/* cl_device_info */
-/* cl_queue_properties */
-#define CL_QUEUE_FAMILY_INTEL                               0x418C
-#define CL_QUEUE_INDEX_INTEL                                0x418D
-/* cl_command_queue_capabilities_intel */
-#define CL_QUEUE_DEFAULT_CAPABILITIES_INTEL                 0
-#define CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL                (1 << 10)
-#define CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL               (1 << 11)
-#define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL            (1 << 12)
-#define CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL                 (1 << 13)
-#define CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL                (1 << 14)
-#define CL_QUEUE_CAPABILITY_MARKER_INTEL                    (1 << 24)
-#define CL_QUEUE_CAPABILITY_BARRIER_INTEL                   (1 << 25)
-#define CL_QUEUE_CAPABILITY_KERNEL_INTEL                    (1 << 26)
-#ifdef __cplusplus
-#endif /* __CL_EXT_H */
diff --git a/dependencies/ocl-headers/CL/cl_gl.h b/dependencies/ocl-headers/CL/cl_gl.h
deleted file mode 100644
index 5ea0fd8..0000000
--- a/dependencies/ocl-headers/CL/cl_gl.h
+++ /dev/null
@@ -1,169 +0,0 @@
- * Copyright (c) 2008-2021 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-#ifndef __OPENCL_CL_GL_H
-#define __OPENCL_CL_GL_H
-#include <CL/cl.h>
-#ifdef __cplusplus
-extern "C" {
-typedef cl_uint     cl_gl_object_type;
-typedef cl_uint     cl_gl_texture_info;
-typedef cl_uint     cl_gl_platform_info;
-typedef struct __GLsync *cl_GLsync;
-/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken           */
-#define CL_GL_OBJECT_BUFFER                     0x2000
-#define CL_GL_OBJECT_TEXTURE2D                  0x2001
-#define CL_GL_OBJECT_TEXTURE3D                  0x2002
-#define CL_GL_OBJECT_RENDERBUFFER               0x2003
-#ifdef CL_VERSION_1_2
-#define CL_GL_OBJECT_TEXTURE2D_ARRAY            0x200E
-#define CL_GL_OBJECT_TEXTURE1D                  0x200F
-#define CL_GL_OBJECT_TEXTURE1D_ARRAY            0x2010
-#define CL_GL_OBJECT_TEXTURE_BUFFER             0x2011
-/* cl_gl_texture_info           */
-#define CL_GL_TEXTURE_TARGET                    0x2004
-#define CL_GL_MIPMAP_LEVEL                      0x2005
-#ifdef CL_VERSION_1_2
-#define CL_GL_NUM_SAMPLES                       0x2012
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromGLBuffer(cl_context     context,
-                     cl_mem_flags   flags,
-                     cl_GLuint      bufobj,
-                     cl_int *       errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-#ifdef CL_VERSION_1_2
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromGLTexture(cl_context      context,
-                      cl_mem_flags    flags,
-                      cl_GLenum       target,
-                      cl_GLint        miplevel,
-                      cl_GLuint       texture,
-                      cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_2;
-extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromGLRenderbuffer(cl_context   context,
-                           cl_mem_flags flags,
-                           cl_GLuint    renderbuffer,
-                           cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetGLObjectInfo(cl_mem                memobj,
-                  cl_gl_object_type *   gl_object_type,
-                  cl_GLuint *           gl_object_name) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetGLTextureInfo(cl_mem               memobj,
-                   cl_gl_texture_info   param_name,
-                   size_t               param_value_size,
-                   void *               param_value,
-                   size_t *             param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueAcquireGLObjects(cl_command_queue      command_queue,
-                          cl_uint               num_objects,
-                          const cl_mem *        mem_objects,
-                          cl_uint               num_events_in_wait_list,
-                          const cl_event *      event_wait_list,
-                          cl_event *            event) CL_API_SUFFIX__VERSION_1_0;
-extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReleaseGLObjects(cl_command_queue      command_queue,
-                          cl_uint               num_objects,
-                          const cl_mem *        mem_objects,
-                          cl_uint               num_events_in_wait_list,
-                          const cl_event *      event_wait_list,
-                          cl_event *            event) CL_API_SUFFIX__VERSION_1_0;
-/* Deprecated OpenCL 1.1 APIs */
-clCreateFromGLTexture2D(cl_context      context,
-                        cl_mem_flags    flags,
-                        cl_GLenum       target,
-                        cl_GLint        miplevel,
-                        cl_GLuint       texture,
-                        cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-clCreateFromGLTexture3D(cl_context      context,
-                        cl_mem_flags    flags,
-                        cl_GLenum       target,
-                        cl_GLint        miplevel,
-                        cl_GLuint       texture,
-                        cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
-/* cl_khr_gl_sharing extension  */
-#define cl_khr_gl_sharing 1
-typedef cl_uint     cl_gl_context_info;
-/* Additional Error Codes  */
-/* cl_gl_context_info  */
-#define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
-/* Additional cl_context_properties  */
-#define CL_GL_CONTEXT_KHR                       0x2008
-#define CL_EGL_DISPLAY_KHR                      0x2009
-#define CL_GLX_DISPLAY_KHR                      0x200A
-#define CL_WGL_HDC_KHR                          0x200B
-#define CL_CGL_SHAREGROUP_KHR                   0x200C
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetGLContextInfoKHR(const cl_context_properties * properties,
-                      cl_gl_context_info            param_name,
-                      size_t                        param_value_size,
-                      void *                        param_value,
-                      size_t *                      param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
-    const cl_context_properties * properties,
-    cl_gl_context_info            param_name,
-    size_t                        param_value_size,
-    void *                        param_value,
-    size_t *                      param_value_size_ret);
- *  cl_khr_gl_event extension
- */
-extern CL_API_ENTRY cl_event CL_API_CALL
-clCreateEventFromGLsyncKHR(cl_context context,
-                           cl_GLsync  sync,
-                           cl_int *   errcode_ret) CL_API_SUFFIX__VERSION_1_1;
-#ifdef __cplusplus
-#endif  /* __OPENCL_CL_GL_H */
diff --git a/dependencies/ocl-headers/CL/cl_gl_ext.h b/dependencies/ocl-headers/CL/cl_gl_ext.h
deleted file mode 100644
index 8ec8181..0000000
--- a/dependencies/ocl-headers/CL/cl_gl_ext.h
+++ /dev/null
@@ -1,18 +0,0 @@
- * Copyright (c) 2008-2021 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-#include <CL/cl_gl.h>
-#pragma message("All OpenGL-related extensions have been moved into cl_gl.h.  Please include cl_gl.h directly.")
diff --git a/dependencies/ocl-headers/CL/cl_half.h b/dependencies/ocl-headers/CL/cl_half.h
deleted file mode 100644
index ecc4223..0000000
--- a/dependencies/ocl-headers/CL/cl_half.h
+++ /dev/null
@@ -1,440 +0,0 @@
- * Copyright (c) 2019-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
- * This is a header-only utility library that provides OpenCL host code with
- * routines for converting to/from cl_half values.
- *
- * Example usage:
- *
- *    #include <CL/cl_half.h>
- *    ...
- *    cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
- *    cl_float f = cl_half_to_float(h);
- */
-#include <CL/cl_platform.h>
-#include <stdint.h>
-#ifdef __cplusplus
-extern "C" {
- * Rounding mode used when converting to cl_half.
- */
-typedef enum
-  CL_HALF_RTE, // round to nearest even
-  CL_HALF_RTZ, // round towards zero
-  CL_HALF_RTP, // round towards positive infinity
-  CL_HALF_RTN, // round towards negative infinity
-} cl_half_rounding_mode;
-/* Private utility macros. */
-#define CL_HALF_EXP_MASK 0x7C00
- * Utility to deal with values that overflow when converting to half precision.
- */
-static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
-                                              uint16_t sign)
-  if (rounding_mode == CL_HALF_RTZ)
-  {
-    // Round overflow towards zero -> largest finite number (preserving sign)
-    return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
-  }
-  else if (rounding_mode == CL_HALF_RTP && sign)
-  {
-    // Round negative overflow towards positive infinity -> most negative finite number
-    return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
-  }
-  else if (rounding_mode == CL_HALF_RTN && !sign)
-  {
-    // Round positive overflow towards negative infinity -> largest finite number
-  }
-  // Overflow to infinity
-  return (sign << 15) | CL_HALF_EXP_MASK;
- * Utility to deal with values that underflow when converting to half precision.
- */
-static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
-                                               uint16_t sign)
-  if (rounding_mode == CL_HALF_RTP && !sign)
-  {
-    // Round underflow towards positive infinity -> smallest positive value
-    return (sign << 15) | 1;
-  }
-  else if (rounding_mode == CL_HALF_RTN && sign)
-  {
-    // Round underflow towards negative infinity -> largest negative value
-    return (sign << 15) | 1;
-  }
-  // Flush to zero
-  return (sign << 15);
- * Convert a cl_float to a cl_half.
- */
-static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
-  // Type-punning to get direct access to underlying bits
-  union
-  {
-    cl_float f;
-    uint32_t i;
-  } f32;
-  f32.f = f;
-  // Extract sign bit
-  uint16_t sign = f32.i >> 31;
-  // Extract FP32 exponent and mantissa
-  uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
-  uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
-  // Remove FP32 exponent bias
-  int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
-  // Add FP16 exponent bias
-  uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
-  // Position of the bit that will become the FP16 mantissa LSB
-  uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
-  // Check for NaN / infinity
-  if (f_exp == 0xFF)
-  {
-    if (f_mant)
-    {
-      // NaN -> propagate mantissa and silence it
-      uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
-      h_mant |= 0x200;
-      return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
-    }
-    else
-    {
-      // Infinity -> zero mantissa
-      return (sign << 15) | CL_HALF_EXP_MASK;
-    }
-  }
-  // Check for zero
-  if (!f_exp && !f_mant)
-  {
-    return (sign << 15);
-  }
-  // Check for overflow
-  if (exp >= CL_HALF_MAX_EXP)
-  {
-    return cl_half_handle_overflow(rounding_mode, sign);
-  }
-  // Check for underflow
-  if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
-  {
-    return cl_half_handle_underflow(rounding_mode, sign);
-  }
-  // Check for value that will become denormal
-  if (exp < -14)
-  {
-    // Denormal -> include the implicit 1 from the FP32 mantissa
-    h_exp = 0;
-    f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
-    // Mantissa shift amount depends on exponent
-    lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
-  }
-  // Generate FP16 mantissa by shifting FP32 mantissa
-  uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
-  // Check whether we need to round
-  uint32_t halfway = 1 << (lsb_pos - 1);
-  uint32_t mask = (halfway << 1) - 1;
-  switch (rounding_mode)
-  {
-    case CL_HALF_RTE:
-      if ((f_mant & mask) > halfway)
-      {
-        // More than halfway -> round up
-        h_mant += 1;
-      }
-      else if ((f_mant & mask) == halfway)
-      {
-        // Exactly halfway -> round to nearest even
-        if (h_mant & 0x1)
-          h_mant += 1;
-      }
-      break;
-    case CL_HALF_RTZ:
-      // Mantissa has already been truncated -> do nothing
-      break;
-    case CL_HALF_RTP:
-      if ((f_mant & mask) && !sign)
-      {
-        // Round positive numbers up
-        h_mant += 1;
-      }
-      break;
-    case CL_HALF_RTN:
-      if ((f_mant & mask) && sign)
-      {
-        // Round negative numbers down
-        h_mant += 1;
-      }
-      break;
-  }
-  // Check for mantissa overflow
-  if (h_mant & 0x400)
-  {
-    h_exp += 1;
-    h_mant = 0;
-  }
-  return (sign << 15) | (h_exp << 10) | h_mant;
- * Convert a cl_double to a cl_half.
- */
-static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
-  // Type-punning to get direct access to underlying bits
-  union
-  {
-    cl_double d;
-    uint64_t i;
-  } f64;
-  f64.d = d;
-  // Extract sign bit
-  uint16_t sign = f64.i >> 63;
-  // Extract FP64 exponent and mantissa
-  uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
-  uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
-  // Remove FP64 exponent bias
-  int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
-  // Add FP16 exponent bias
-  uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
-  // Position of the bit that will become the FP16 mantissa LSB
-  uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
-  // Check for NaN / infinity
-  if (d_exp == 0x7FF)
-  {
-    if (d_mant)
-    {
-      // NaN -> propagate mantissa and silence it
-      uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
-      h_mant |= 0x200;
-      return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
-    }
-    else
-    {
-      // Infinity -> zero mantissa
-      return (sign << 15) | CL_HALF_EXP_MASK;
-    }
-  }
-  // Check for zero
-  if (!d_exp && !d_mant)
-  {
-    return (sign << 15);
-  }
-  // Check for overflow
-  if (exp >= CL_HALF_MAX_EXP)
-  {
-    return cl_half_handle_overflow(rounding_mode, sign);
-  }
-  // Check for underflow
-  if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
-  {
-    return cl_half_handle_underflow(rounding_mode, sign);
-  }
-  // Check for value that will become denormal
-  if (exp < -14)
-  {
-    // Include the implicit 1 from the FP64 mantissa
-    h_exp = 0;
-    d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
-    // Mantissa shift amount depends on exponent
-    lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
-  }
-  // Generate FP16 mantissa by shifting FP64 mantissa
-  uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
-  // Check whether we need to round
-  uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
-  uint64_t mask = (halfway << 1) - 1;
-  switch (rounding_mode)
-  {
-    case CL_HALF_RTE:
-      if ((d_mant & mask) > halfway)
-      {
-        // More than halfway -> round up
-        h_mant += 1;
-      }
-      else if ((d_mant & mask) == halfway)
-      {
-        // Exactly halfway -> round to nearest even
-        if (h_mant & 0x1)
-          h_mant += 1;
-      }
-      break;
-    case CL_HALF_RTZ:
-      // Mantissa has already been truncated -> do nothing
-      break;
-    case CL_HALF_RTP:
-      if ((d_mant & mask) && !sign)
-      {
-        // Round positive numbers up
-        h_mant += 1;
-      }
-      break;
-    case CL_HALF_RTN:
-      if ((d_mant & mask) && sign)
-      {
-        // Round negative numbers down
-        h_mant += 1;
-      }
-      break;
-  }
-  // Check for mantissa overflow
-  if (h_mant & 0x400)
-  {
-    h_exp += 1;
-    h_mant = 0;
-  }
-  return (sign << 15) | (h_exp << 10) | h_mant;
- * Convert a cl_half to a cl_float.
- */
-static inline cl_float cl_half_to_float(cl_half h)
-  // Type-punning to get direct access to underlying bits
-  union
-  {
-    cl_float f;
-    uint32_t i;
-  } f32;
-  // Extract sign bit
-  uint16_t sign = h >> 15;
-  // Extract FP16 exponent and mantissa
-  uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
-  uint16_t h_mant = h & 0x3FF;
-  // Remove FP16 exponent bias
-  int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
-  // Add FP32 exponent bias
-  uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
-  // Check for NaN / infinity
-  if (h_exp == 0x1F)
-  {
-    if (h_mant)
-    {
-      // NaN -> propagate mantissa and silence it
-      uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
-      f_mant |= 0x400000;
-      f32.i = (sign << 31) | 0x7F800000 | f_mant;
-      return f32.f;
-    }
-    else
-    {
-      // Infinity -> zero mantissa
-      f32.i = (sign << 31) | 0x7F800000;
-      return f32.f;
-    }
-  }
-  // Check for zero / denormal
-  if (h_exp == 0)
-  {
-    if (h_mant == 0)
-    {
-      // Zero -> zero exponent
-      f_exp = 0;
-    }
-    else
-    {
-      // Denormal -> normalize it
-      // - Shift mantissa to make most-significant 1 implicit
-      // - Adjust exponent accordingly
-      uint32_t shift = 0;
-      while ((h_mant & 0x400) == 0)
-      {
-        h_mant <<= 1;
-        shift++;
-      }
-      h_mant &= 0x3FF;
-      f_exp -= shift - 1;
-    }
-  }
-  f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
-  return f32.f;
-#ifdef __cplusplus
-#endif  /* OPENCL_CL_HALF_H */
diff --git a/dependencies/ocl-headers/CL/cl_platform.h b/dependencies/ocl-headers/CL/cl_platform.h
deleted file mode 100644
index 8ae655d..0000000
--- a/dependencies/ocl-headers/CL/cl_platform.h
+++ /dev/null
@@ -1,1404 +0,0 @@
- * Copyright (c) 2008-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-#ifndef __CL_PLATFORM_H
-#define __CL_PLATFORM_H
-#include <CL/cl_version.h>
-#ifdef __cplusplus
-extern "C" {
-#if defined(_WIN32)
-    #if !defined(CL_API_ENTRY)
-        #define CL_API_ENTRY
-    #endif
-    #if !defined(CL_API_CALL)
-        #define CL_API_CALL     __stdcall
-    #endif
-    #if !defined(CL_CALLBACK)
-        #define CL_CALLBACK     __stdcall
-    #endif
-    #if !defined(CL_API_ENTRY)
-        #define CL_API_ENTRY
-    #endif
-    #if !defined(CL_API_CALL)
-        #define CL_API_CALL
-    #endif
-    #if !defined(CL_CALLBACK)
-        #define CL_CALLBACK
-    #endif
- * Deprecation flags refer to the last version of the header in which the
- * feature was not deprecated.
- *
- * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without
- * deprecation but is deprecated in versions later than 1.1.
- */
-#ifdef __GNUC__
-  #define CL_API_SUFFIX_DEPRECATED __attribute__((deprecated))
-#elif defined(_WIN32)
-  #define CL_API_PREFIX_DEPRECATED __declspec(deprecated)
- #endif
-#if (defined (_WIN32) && defined(_MSC_VER))
-/* intptr_t is used in cl.h and provided by stddef.h in Visual C++, but not in clang */
-/* stdint.h was missing before Visual Studio 2010, include it for later versions and for clang */
-#if defined(__clang__) || _MSC_VER >= 1600
-    #include <stdint.h>
-/* scalar types  */
-typedef signed   __int8         cl_char;
-typedef unsigned __int8         cl_uchar;
-typedef signed   __int16        cl_short;
-typedef unsigned __int16        cl_ushort;
-typedef signed   __int32        cl_int;
-typedef unsigned __int32        cl_uint;
-typedef signed   __int64        cl_long;
-typedef unsigned __int64        cl_ulong;
-typedef unsigned __int16        cl_half;
-typedef float                   cl_float;
-typedef double                  cl_double;
-/* Macro names and corresponding values defined by OpenCL */
-#define CL_CHAR_BIT         8
-#define CL_SCHAR_MAX        127
-#define CL_SCHAR_MIN        (-127-1)
-#define CL_CHAR_MAX         CL_SCHAR_MAX
-#define CL_CHAR_MIN         CL_SCHAR_MIN
-#define CL_UCHAR_MAX        255
-#define CL_SHRT_MAX         32767
-#define CL_SHRT_MIN         (-32767-1)
-#define CL_USHRT_MAX        65535
-#define CL_INT_MAX          2147483647
-#define CL_INT_MIN          (-2147483647-1)
-#define CL_UINT_MAX         0xffffffffU
-#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
-#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
-#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
-#define CL_FLT_DIG          6
-#define CL_FLT_MANT_DIG     24
-#define CL_FLT_MAX_10_EXP   +38
-#define CL_FLT_MAX_EXP      +128
-#define CL_FLT_MIN_10_EXP   -37
-#define CL_FLT_MIN_EXP      -125
-#define CL_FLT_RADIX        2
-#define CL_FLT_MAX          340282346638528859811704183484516925440.0f
-#define CL_FLT_MIN          1.175494350822287507969e-38f
-#define CL_FLT_EPSILON      1.1920928955078125e-7f
-#define CL_HALF_DIG          3
-#define CL_HALF_MANT_DIG     11
-#define CL_HALF_MAX_10_EXP   +4
-#define CL_HALF_MAX_EXP      +16
-#define CL_HALF_MIN_10_EXP   -4
-#define CL_HALF_MIN_EXP      -13
-#define CL_HALF_RADIX        2
-#define CL_HALF_MAX          65504.0f
-#define CL_HALF_MIN          6.103515625e-05f
-#define CL_HALF_EPSILON      9.765625e-04f
-#define CL_DBL_DIG          15
-#define CL_DBL_MANT_DIG     53
-#define CL_DBL_MAX_10_EXP   +308
-#define CL_DBL_MAX_EXP      +1024
-#define CL_DBL_MIN_10_EXP   -307
-#define CL_DBL_MIN_EXP      -1021
-#define CL_DBL_RADIX        2
-#define CL_DBL_MAX          1.7976931348623158e+308
-#define CL_DBL_MIN          2.225073858507201383090e-308
-#define CL_DBL_EPSILON      2.220446049250313080847e-16
-#define CL_M_E              2.7182818284590452354
-#define CL_M_LOG2E          1.4426950408889634074
-#define CL_M_LOG10E         0.43429448190325182765
-#define CL_M_LN2            0.69314718055994530942
-#define CL_M_LN10           2.30258509299404568402
-#define CL_M_PI             3.14159265358979323846
-#define CL_M_PI_2           1.57079632679489661923
-#define CL_M_PI_4           0.78539816339744830962
-#define CL_M_1_PI           0.31830988618379067154
-#define CL_M_2_PI           0.63661977236758134308
-#define CL_M_2_SQRTPI       1.12837916709551257390
-#define CL_M_SQRT2          1.41421356237309504880
-#define CL_M_SQRT1_2        0.70710678118654752440
-#define CL_M_E_F            2.718281828f
-#define CL_M_LOG2E_F        1.442695041f
-#define CL_M_LOG10E_F       0.434294482f
-#define CL_M_LN2_F          0.693147181f
-#define CL_M_LN10_F         2.302585093f
-#define CL_M_PI_F           3.141592654f
-#define CL_M_PI_2_F         1.570796327f
-#define CL_M_PI_4_F         0.785398163f
-#define CL_M_1_PI_F         0.318309886f
-#define CL_M_2_PI_F         0.636619772f
-#define CL_M_2_SQRTPI_F     1.128379167f
-#define CL_M_SQRT2_F        1.414213562f
-#define CL_M_SQRT1_2_F      0.707106781f
-#define CL_NAN              (CL_INFINITY - CL_INFINITY)
-#define CL_HUGE_VALF        ((cl_float) 1e50)
-#define CL_HUGE_VAL         ((cl_double) 1e500)
-#define CL_MAXFLOAT         CL_FLT_MAX
-#define CL_INFINITY         CL_HUGE_VALF
-#include <stdint.h>
-/* scalar types  */
-typedef int8_t          cl_char;
-typedef uint8_t         cl_uchar;
-typedef int16_t         cl_short;
-typedef uint16_t        cl_ushort;
-typedef int32_t         cl_int;
-typedef uint32_t        cl_uint;
-typedef int64_t         cl_long;
-typedef uint64_t        cl_ulong;
-typedef uint16_t        cl_half;
-typedef float           cl_float;
-typedef double          cl_double;
-/* Macro names and corresponding values defined by OpenCL */
-#define CL_CHAR_BIT         8
-#define CL_SCHAR_MAX        127
-#define CL_SCHAR_MIN        (-127-1)
-#define CL_CHAR_MAX         CL_SCHAR_MAX
-#define CL_CHAR_MIN         CL_SCHAR_MIN
-#define CL_UCHAR_MAX        255
-#define CL_SHRT_MAX         32767
-#define CL_SHRT_MIN         (-32767-1)
-#define CL_USHRT_MAX        65535
-#define CL_INT_MAX          2147483647
-#define CL_INT_MIN          (-2147483647-1)
-#define CL_UINT_MAX         0xffffffffU
-#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
-#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
-#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
-#define CL_FLT_DIG          6
-#define CL_FLT_MANT_DIG     24
-#define CL_FLT_MAX_10_EXP   +38
-#define CL_FLT_MAX_EXP      +128
-#define CL_FLT_MIN_10_EXP   -37
-#define CL_FLT_MIN_EXP      -125
-#define CL_FLT_RADIX        2
-#define CL_FLT_MAX          340282346638528859811704183484516925440.0f
-#define CL_FLT_MIN          1.175494350822287507969e-38f
-#define CL_FLT_EPSILON      1.1920928955078125e-7f
-#define CL_HALF_DIG          3
-#define CL_HALF_MANT_DIG     11
-#define CL_HALF_MAX_10_EXP   +4
-#define CL_HALF_MAX_EXP      +16
-#define CL_HALF_MIN_10_EXP   -4
-#define CL_HALF_MIN_EXP      -13
-#define CL_HALF_RADIX        2
-#define CL_HALF_MAX          65504.0f
-#define CL_HALF_MIN          6.103515625e-05f
-#define CL_HALF_EPSILON      9.765625e-04f
-#define CL_DBL_DIG          15
-#define CL_DBL_MANT_DIG     53
-#define CL_DBL_MAX_10_EXP   +308
-#define CL_DBL_MAX_EXP      +1024
-#define CL_DBL_MIN_10_EXP   -307
-#define CL_DBL_MIN_EXP      -1021
-#define CL_DBL_RADIX        2
-#define CL_DBL_MAX          179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
-#define CL_DBL_MIN          2.225073858507201383090e-308
-#define CL_DBL_EPSILON      2.220446049250313080847e-16
-#define CL_M_E              2.7182818284590452354
-#define CL_M_LOG2E          1.4426950408889634074
-#define CL_M_LOG10E         0.43429448190325182765
-#define CL_M_LN2            0.69314718055994530942
-#define CL_M_LN10           2.30258509299404568402
-#define CL_M_PI             3.14159265358979323846
-#define CL_M_PI_2           1.57079632679489661923
-#define CL_M_PI_4           0.78539816339744830962
-#define CL_M_1_PI           0.31830988618379067154
-#define CL_M_2_PI           0.63661977236758134308
-#define CL_M_2_SQRTPI       1.12837916709551257390
-#define CL_M_SQRT2          1.41421356237309504880
-#define CL_M_SQRT1_2        0.70710678118654752440
-#define CL_M_E_F            2.718281828f
-#define CL_M_LOG2E_F        1.442695041f
-#define CL_M_LOG10E_F       0.434294482f
-#define CL_M_LN2_F          0.693147181f
-#define CL_M_LN10_F         2.302585093f
-#define CL_M_PI_F           3.141592654f
-#define CL_M_PI_2_F         1.570796327f
-#define CL_M_PI_4_F         0.785398163f
-#define CL_M_1_PI_F         0.318309886f
-#define CL_M_2_PI_F         0.636619772f
-#define CL_M_2_SQRTPI_F     1.128379167f
-#define CL_M_SQRT2_F        1.414213562f
-#define CL_M_SQRT1_2_F      0.707106781f
-#if defined( __GNUC__ )
-   #define CL_HUGE_VALF     __builtin_huge_valf()
-   #define CL_HUGE_VAL      __builtin_huge_val()
-   #define CL_NAN           __builtin_nanf( "" )
-   #define CL_HUGE_VALF     ((cl_float) 1e50)
-   #define CL_HUGE_VAL      ((cl_double) 1e500)
-   float nanf( const char * );
-   #define CL_NAN           nanf( "" )
-#define CL_MAXFLOAT         CL_FLT_MAX
-#define CL_INFINITY         CL_HUGE_VALF
-#include <stddef.h>
-/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
-typedef unsigned int cl_GLuint;
-typedef int          cl_GLint;
-typedef unsigned int cl_GLenum;
- * Vector types
- *
- *  Note:   OpenCL requires that all types be naturally aligned.
- *          This means that vector types must be naturally aligned.
- *          For example, a vector of four floats must be aligned to
- *          a 16 byte boundary (calculated as 4 * the natural 4-byte
- *          alignment of the float).  The alignment qualifiers here
- *          will only function properly if your compiler supports them
- *          and if you don't actively work to defeat them.  For example,
- *          in order for a cl_float4 to be 16 byte aligned in a struct,
- *          the start of the struct must itself be 16-byte aligned.
- *
- *          Maintaining proper alignment is the user's responsibility.
- */
-/* Define basic vector types */
-#if defined( __VEC__ )
-  #if !defined(__clang__)
-     #include <altivec.h>   /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
-  #endif
-   typedef __vector unsigned char     __cl_uchar16;
-   typedef __vector signed char       __cl_char16;
-   typedef __vector unsigned short    __cl_ushort8;
-   typedef __vector signed short      __cl_short8;
-   typedef __vector unsigned int      __cl_uint4;
-   typedef __vector signed int        __cl_int4;
-   typedef __vector float             __cl_float4;
-   #define  __CL_UCHAR16__  1
-   #define  __CL_CHAR16__   1
-   #define  __CL_USHORT8__  1
-   #define  __CL_SHORT8__   1
-   #define  __CL_UINT4__    1
-   #define  __CL_INT4__     1
-   #define  __CL_FLOAT4__   1
-#if defined( __SSE__ )
-    #if defined( __MINGW64__ )
-        #include <intrin.h>
-    #else
-        #include <xmmintrin.h>
-    #endif
-    #if defined( __GNUC__ )
-        typedef float __cl_float4   __attribute__((vector_size(16)));
-    #else
-        typedef __m128 __cl_float4;
-    #endif
-    #define __CL_FLOAT4__   1
-#if defined( __SSE2__ )
-    #if defined( __MINGW64__ )
-        #include <intrin.h>
-    #else
-        #include <emmintrin.h>
-    #endif
-    #if defined( __GNUC__ )
-        typedef cl_uchar    __cl_uchar16    __attribute__((vector_size(16)));
-        typedef cl_char     __cl_char16     __attribute__((vector_size(16)));
-        typedef cl_ushort   __cl_ushort8    __attribute__((vector_size(16)));
-        typedef cl_short    __cl_short8     __attribute__((vector_size(16)));
-        typedef cl_uint     __cl_uint4      __attribute__((vector_size(16)));
-        typedef cl_int      __cl_int4       __attribute__((vector_size(16)));
-        typedef cl_ulong    __cl_ulong2     __attribute__((vector_size(16)));
-        typedef cl_long     __cl_long2      __attribute__((vector_size(16)));
-        typedef cl_double   __cl_double2    __attribute__((vector_size(16)));
-    #else
-        typedef __m128i __cl_uchar16;
-        typedef __m128i __cl_char16;
-        typedef __m128i __cl_ushort8;
-        typedef __m128i __cl_short8;
-        typedef __m128i __cl_uint4;
-        typedef __m128i __cl_int4;
-        typedef __m128i __cl_ulong2;
-        typedef __m128i __cl_long2;
-        typedef __m128d __cl_double2;
-    #endif
-    #define __CL_UCHAR16__  1
-    #define __CL_CHAR16__   1
-    #define __CL_USHORT8__  1
-    #define __CL_SHORT8__   1
-    #define __CL_INT4__     1
-    #define __CL_UINT4__    1
-    #define __CL_ULONG2__   1
-    #define __CL_LONG2__    1
-    #define __CL_DOUBLE2__  1
-#if defined( __MMX__ )
-    #include <mmintrin.h>
-    #if defined( __GNUC__ )
-        typedef cl_uchar    __cl_uchar8     __attribute__((vector_size(8)));
-        typedef cl_char     __cl_char8      __attribute__((vector_size(8)));
-        typedef cl_ushort   __cl_ushort4    __attribute__((vector_size(8)));
-        typedef cl_short    __cl_short4     __attribute__((vector_size(8)));
-        typedef cl_uint     __cl_uint2      __attribute__((vector_size(8)));
-        typedef cl_int      __cl_int2       __attribute__((vector_size(8)));
-        typedef cl_ulong    __cl_ulong1     __attribute__((vector_size(8)));
-        typedef cl_long     __cl_long1      __attribute__((vector_size(8)));
-        typedef cl_float    __cl_float2     __attribute__((vector_size(8)));
-    #else
-        typedef __m64       __cl_uchar8;
-        typedef __m64       __cl_char8;
-        typedef __m64       __cl_ushort4;
-        typedef __m64       __cl_short4;
-        typedef __m64       __cl_uint2;
-        typedef __m64       __cl_int2;
-        typedef __m64       __cl_ulong1;
-        typedef __m64       __cl_long1;
-        typedef __m64       __cl_float2;
-    #endif
-    #define __CL_UCHAR8__   1
-    #define __CL_CHAR8__    1
-    #define __CL_USHORT4__  1
-    #define __CL_SHORT4__   1
-    #define __CL_INT2__     1
-    #define __CL_UINT2__    1
-    #define __CL_ULONG1__   1
-    #define __CL_LONG1__    1
-    #define __CL_FLOAT2__   1
-#if defined( __AVX__ )
-    #if defined( __MINGW64__ )
-        #include <intrin.h>
-    #else
-        #include <immintrin.h>
-    #endif
-    #if defined( __GNUC__ )
-        typedef cl_float    __cl_float8     __attribute__((vector_size(32)));
-        typedef cl_double   __cl_double4    __attribute__((vector_size(32)));
-    #else
-        typedef __m256      __cl_float8;
-        typedef __m256d     __cl_double4;
-    #endif
-    #define __CL_FLOAT8__   1
-    #define __CL_DOUBLE4__  1
-/* Define capabilities for anonymous struct members. */
-#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
-#define  __CL_HAS_ANON_STRUCT__ 1
-#define  __CL_ANON_STRUCT__
-#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
-#define  __CL_HAS_ANON_STRUCT__ 1
-#define  __CL_ANON_STRUCT__ __extension__
-#elif defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__)
-    #if _MSC_VER >= 1500
-   /* Microsoft Developer Studio 2008 supports anonymous structs, but
-    * complains by default. */
-    #define  __CL_HAS_ANON_STRUCT__ 1
-    #define  __CL_ANON_STRUCT__
-   /* Disable warning C4201: nonstandard extension used : nameless
-    * struct/union */
-    #pragma warning( push )
-    #pragma warning( disable : 4201 )
-    #endif
-#define  __CL_HAS_ANON_STRUCT__ 0
-#define  __CL_ANON_STRUCT__
-/* Define alignment keys */
-#if defined( __GNUC__ ) || defined(__INTEGRITY)
-    #define CL_ALIGNED(_x)          __attribute__ ((aligned(_x)))
-#elif defined( _WIN32) && (_MSC_VER)
-    /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements     */
-    /*                                                 */
-    /* #include <crtdefs.h>                                                                                             */
-    /* #define CL_ALIGNED(_x)          _CRT_ALIGN(_x)                                                                   */
-    #define CL_ALIGNED(_x)
-   #warning  Need to implement some method to align data here
-   #define  CL_ALIGNED(_x)
-/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
-    /* .xyzw and .s0123...{f|F} are supported */
-    /* .hi and .lo are supported */
-/* Define cl_vector types */
-/* ---- cl_charn ---- */
-typedef union
-    cl_char  CL_ALIGNED(2) s[2];
-   __CL_ANON_STRUCT__ struct{ cl_char  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_char  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_char  lo, hi; };
-#if defined( __CL_CHAR2__)
-    __cl_char2     v2;
-typedef union
-    cl_char  CL_ALIGNED(4) s[4];
-   __CL_ANON_STRUCT__ struct{ cl_char  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_char  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; };
-#if defined( __CL_CHAR2__)
-    __cl_char2     v2[2];
-#if defined( __CL_CHAR4__)
-    __cl_char4     v4;
-/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
-typedef  cl_char4  cl_char3;
-typedef union
-    cl_char   CL_ALIGNED(8) s[8];
-   __CL_ANON_STRUCT__ struct{ cl_char  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; };
-#if defined( __CL_CHAR2__)
-    __cl_char2     v2[4];
-#if defined( __CL_CHAR4__)
-    __cl_char4     v4[2];
-#if defined( __CL_CHAR8__ )
-    __cl_char8     v8;
-typedef union
-    cl_char  CL_ALIGNED(16) s[16];
-   __CL_ANON_STRUCT__ struct{ cl_char  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; };
-#if defined( __CL_CHAR2__)
-    __cl_char2     v2[8];
-#if defined( __CL_CHAR4__)
-    __cl_char4     v4[4];
-#if defined( __CL_CHAR8__ )
-    __cl_char8     v8[2];
-#if defined( __CL_CHAR16__ )
-    __cl_char16    v16;
-/* ---- cl_ucharn ---- */
-typedef union
-    cl_uchar  CL_ALIGNED(2) s[2];
-   __CL_ANON_STRUCT__ struct{ cl_uchar  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar  lo, hi; };
-#if defined( __cl_uchar2__)
-    __cl_uchar2     v2;
-typedef union
-    cl_uchar  CL_ALIGNED(4) s[4];
-   __CL_ANON_STRUCT__ struct{ cl_uchar  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; };
-#if defined( __CL_UCHAR2__)
-    __cl_uchar2     v2[2];
-#if defined( __CL_UCHAR4__)
-    __cl_uchar4     v4;
-/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
-typedef  cl_uchar4  cl_uchar3;
-typedef union
-    cl_uchar   CL_ALIGNED(8) s[8];
-   __CL_ANON_STRUCT__ struct{ cl_uchar  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; };
-#if defined( __CL_UCHAR2__)
-    __cl_uchar2     v2[4];
-#if defined( __CL_UCHAR4__)
-    __cl_uchar4     v4[2];
-#if defined( __CL_UCHAR8__ )
-    __cl_uchar8     v8;
-typedef union
-    cl_uchar  CL_ALIGNED(16) s[16];
-   __CL_ANON_STRUCT__ struct{ cl_uchar  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; };
-#if defined( __CL_UCHAR2__)
-    __cl_uchar2     v2[8];
-#if defined( __CL_UCHAR4__)
-    __cl_uchar4     v4[4];
-#if defined( __CL_UCHAR8__ )
-    __cl_uchar8     v8[2];
-#if defined( __CL_UCHAR16__ )
-    __cl_uchar16    v16;
-/* ---- cl_shortn ---- */
-typedef union
-    cl_short  CL_ALIGNED(4) s[2];
-   __CL_ANON_STRUCT__ struct{ cl_short  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_short  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_short  lo, hi; };
-#if defined( __CL_SHORT2__)
-    __cl_short2     v2;
-typedef union
-    cl_short  CL_ALIGNED(8) s[4];
-   __CL_ANON_STRUCT__ struct{ cl_short  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_short  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; };
-#if defined( __CL_SHORT2__)
-    __cl_short2     v2[2];
-#if defined( __CL_SHORT4__)
-    __cl_short4     v4;
-/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
-typedef  cl_short4  cl_short3;
-typedef union
-    cl_short   CL_ALIGNED(16) s[8];
-   __CL_ANON_STRUCT__ struct{ cl_short  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; };
-#if defined( __CL_SHORT2__)
-    __cl_short2     v2[4];
-#if defined( __CL_SHORT4__)
-    __cl_short4     v4[2];
-#if defined( __CL_SHORT8__ )
-    __cl_short8     v8;
-typedef union
-    cl_short  CL_ALIGNED(32) s[16];
-   __CL_ANON_STRUCT__ struct{ cl_short  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; };
-#if defined( __CL_SHORT2__)
-    __cl_short2     v2[8];
-#if defined( __CL_SHORT4__)
-    __cl_short4     v4[4];
-#if defined( __CL_SHORT8__ )
-    __cl_short8     v8[2];
-#if defined( __CL_SHORT16__ )
-    __cl_short16    v16;
-/* ---- cl_ushortn ---- */
-typedef union
-    cl_ushort  CL_ALIGNED(4) s[2];
-   __CL_ANON_STRUCT__ struct{ cl_ushort  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort  lo, hi; };
-#if defined( __CL_USHORT2__)
-    __cl_ushort2     v2;
-typedef union
-    cl_ushort  CL_ALIGNED(8) s[4];
-   __CL_ANON_STRUCT__ struct{ cl_ushort  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; };
-#if defined( __CL_USHORT2__)
-    __cl_ushort2     v2[2];
-#if defined( __CL_USHORT4__)
-    __cl_ushort4     v4;
-/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
-typedef  cl_ushort4  cl_ushort3;
-typedef union
-    cl_ushort   CL_ALIGNED(16) s[8];
-   __CL_ANON_STRUCT__ struct{ cl_ushort  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; };
-#if defined( __CL_USHORT2__)
-    __cl_ushort2     v2[4];
-#if defined( __CL_USHORT4__)
-    __cl_ushort4     v4[2];
-#if defined( __CL_USHORT8__ )
-    __cl_ushort8     v8;
-typedef union
-    cl_ushort  CL_ALIGNED(32) s[16];
-   __CL_ANON_STRUCT__ struct{ cl_ushort  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; };
-#if defined( __CL_USHORT2__)
-    __cl_ushort2     v2[8];
-#if defined( __CL_USHORT4__)
-    __cl_ushort4     v4[4];
-#if defined( __CL_USHORT8__ )
-    __cl_ushort8     v8[2];
-#if defined( __CL_USHORT16__ )
-    __cl_ushort16    v16;
-/* ---- cl_halfn ---- */
-typedef union
-    cl_half  CL_ALIGNED(4) s[2];
-    __CL_ANON_STRUCT__ struct{ cl_half  x, y; };
-    __CL_ANON_STRUCT__ struct{ cl_half  s0, s1; };
-    __CL_ANON_STRUCT__ struct{ cl_half  lo, hi; };
-#if defined( __CL_HALF2__)
-    __cl_half2     v2;
-typedef union
-    cl_half  CL_ALIGNED(8) s[4];
-    __CL_ANON_STRUCT__ struct{ cl_half  x, y, z, w; };
-    __CL_ANON_STRUCT__ struct{ cl_half  s0, s1, s2, s3; };
-    __CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; };
-#if defined( __CL_HALF2__)
-    __cl_half2     v2[2];
-#if defined( __CL_HALF4__)
-    __cl_half4     v4;
-/* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */
-typedef  cl_half4  cl_half3;
-typedef union
-    cl_half   CL_ALIGNED(16) s[8];
-    __CL_ANON_STRUCT__ struct{ cl_half  x, y, z, w; };
-    __CL_ANON_STRUCT__ struct{ cl_half  s0, s1, s2, s3, s4, s5, s6, s7; };
-    __CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; };
-#if defined( __CL_HALF2__)
-    __cl_half2     v2[4];
-#if defined( __CL_HALF4__)
-    __cl_half4     v4[2];
-#if defined( __CL_HALF8__ )
-    __cl_half8     v8;
-typedef union
-    cl_half  CL_ALIGNED(32) s[16];
-    __CL_ANON_STRUCT__ struct{ cl_half  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-    __CL_ANON_STRUCT__ struct{ cl_half  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-    __CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; };
-#if defined( __CL_HALF2__)
-    __cl_half2     v2[8];
-#if defined( __CL_HALF4__)
-    __cl_half4     v4[4];
-#if defined( __CL_HALF8__ )
-    __cl_half8     v8[2];
-#if defined( __CL_HALF16__ )
-    __cl_half16    v16;
-/* ---- cl_intn ---- */
-typedef union
-    cl_int  CL_ALIGNED(8) s[2];
-   __CL_ANON_STRUCT__ struct{ cl_int  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_int  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_int  lo, hi; };
-#if defined( __CL_INT2__)
-    __cl_int2     v2;
-typedef union
-    cl_int  CL_ALIGNED(16) s[4];
-   __CL_ANON_STRUCT__ struct{ cl_int  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_int  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; };
-#if defined( __CL_INT2__)
-    __cl_int2     v2[2];
-#if defined( __CL_INT4__)
-    __cl_int4     v4;
-/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
-typedef  cl_int4  cl_int3;
-typedef union
-    cl_int   CL_ALIGNED(32) s[8];
-   __CL_ANON_STRUCT__ struct{ cl_int  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; };
-#if defined( __CL_INT2__)
-    __cl_int2     v2[4];
-#if defined( __CL_INT4__)
-    __cl_int4     v4[2];
-#if defined( __CL_INT8__ )
-    __cl_int8     v8;
-typedef union
-    cl_int  CL_ALIGNED(64) s[16];
-   __CL_ANON_STRUCT__ struct{ cl_int  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; };
-#if defined( __CL_INT2__)
-    __cl_int2     v2[8];
-#if defined( __CL_INT4__)
-    __cl_int4     v4[4];
-#if defined( __CL_INT8__ )
-    __cl_int8     v8[2];
-#if defined( __CL_INT16__ )
-    __cl_int16    v16;
-/* ---- cl_uintn ---- */
-typedef union
-    cl_uint  CL_ALIGNED(8) s[2];
-   __CL_ANON_STRUCT__ struct{ cl_uint  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_uint  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_uint  lo, hi; };
-#if defined( __CL_UINT2__)
-    __cl_uint2     v2;
-typedef union
-    cl_uint  CL_ALIGNED(16) s[4];
-   __CL_ANON_STRUCT__ struct{ cl_uint  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_uint  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; };
-#if defined( __CL_UINT2__)
-    __cl_uint2     v2[2];
-#if defined( __CL_UINT4__)
-    __cl_uint4     v4;
-/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
-typedef  cl_uint4  cl_uint3;
-typedef union
-    cl_uint   CL_ALIGNED(32) s[8];
-   __CL_ANON_STRUCT__ struct{ cl_uint  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; };
-#if defined( __CL_UINT2__)
-    __cl_uint2     v2[4];
-#if defined( __CL_UINT4__)
-    __cl_uint4     v4[2];
-#if defined( __CL_UINT8__ )
-    __cl_uint8     v8;
-typedef union
-    cl_uint  CL_ALIGNED(64) s[16];
-   __CL_ANON_STRUCT__ struct{ cl_uint  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; };
-#if defined( __CL_UINT2__)
-    __cl_uint2     v2[8];
-#if defined( __CL_UINT4__)
-    __cl_uint4     v4[4];
-#if defined( __CL_UINT8__ )
-    __cl_uint8     v8[2];
-#if defined( __CL_UINT16__ )
-    __cl_uint16    v16;
-/* ---- cl_longn ---- */
-typedef union
-    cl_long  CL_ALIGNED(16) s[2];
-   __CL_ANON_STRUCT__ struct{ cl_long  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_long  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_long  lo, hi; };
-#if defined( __CL_LONG2__)
-    __cl_long2     v2;
-typedef union
-    cl_long  CL_ALIGNED(32) s[4];
-   __CL_ANON_STRUCT__ struct{ cl_long  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_long  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; };
-#if defined( __CL_LONG2__)
-    __cl_long2     v2[2];
-#if defined( __CL_LONG4__)
-    __cl_long4     v4;
-/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
-typedef  cl_long4  cl_long3;
-typedef union
-    cl_long   CL_ALIGNED(64) s[8];
-   __CL_ANON_STRUCT__ struct{ cl_long  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; };
-#if defined( __CL_LONG2__)
-    __cl_long2     v2[4];
-#if defined( __CL_LONG4__)
-    __cl_long4     v4[2];
-#if defined( __CL_LONG8__ )
-    __cl_long8     v8;
-typedef union
-    cl_long  CL_ALIGNED(128) s[16];
-   __CL_ANON_STRUCT__ struct{ cl_long  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; };
-#if defined( __CL_LONG2__)
-    __cl_long2     v2[8];
-#if defined( __CL_LONG4__)
-    __cl_long4     v4[4];
-#if defined( __CL_LONG8__ )
-    __cl_long8     v8[2];
-#if defined( __CL_LONG16__ )
-    __cl_long16    v16;
-/* ---- cl_ulongn ---- */
-typedef union
-    cl_ulong  CL_ALIGNED(16) s[2];
-   __CL_ANON_STRUCT__ struct{ cl_ulong  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong  lo, hi; };
-#if defined( __CL_ULONG2__)
-    __cl_ulong2     v2;
-typedef union
-    cl_ulong  CL_ALIGNED(32) s[4];
-   __CL_ANON_STRUCT__ struct{ cl_ulong  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; };
-#if defined( __CL_ULONG2__)
-    __cl_ulong2     v2[2];
-#if defined( __CL_ULONG4__)
-    __cl_ulong4     v4;
-/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
-typedef  cl_ulong4  cl_ulong3;
-typedef union
-    cl_ulong   CL_ALIGNED(64) s[8];
-   __CL_ANON_STRUCT__ struct{ cl_ulong  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; };
-#if defined( __CL_ULONG2__)
-    __cl_ulong2     v2[4];
-#if defined( __CL_ULONG4__)
-    __cl_ulong4     v4[2];
-#if defined( __CL_ULONG8__ )
-    __cl_ulong8     v8;
-typedef union
-    cl_ulong  CL_ALIGNED(128) s[16];
-   __CL_ANON_STRUCT__ struct{ cl_ulong  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; };
-#if defined( __CL_ULONG2__)
-    __cl_ulong2     v2[8];
-#if defined( __CL_ULONG4__)
-    __cl_ulong4     v4[4];
-#if defined( __CL_ULONG8__ )
-    __cl_ulong8     v8[2];
-#if defined( __CL_ULONG16__ )
-    __cl_ulong16    v16;
-/* --- cl_floatn ---- */
-typedef union
-    cl_float  CL_ALIGNED(8) s[2];
-   __CL_ANON_STRUCT__ struct{ cl_float  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_float  s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_float  lo, hi; };
-#if defined( __CL_FLOAT2__)
-    __cl_float2     v2;
-typedef union
-    cl_float  CL_ALIGNED(16) s[4];
-   __CL_ANON_STRUCT__ struct{ cl_float   x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_float   s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_float2  lo, hi; };
-#if defined( __CL_FLOAT2__)
-    __cl_float2     v2[2];
-#if defined( __CL_FLOAT4__)
-    __cl_float4     v4;
-/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
-typedef  cl_float4  cl_float3;
-typedef union
-    cl_float   CL_ALIGNED(32) s[8];
-   __CL_ANON_STRUCT__ struct{ cl_float   x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_float   s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_float4  lo, hi; };
-#if defined( __CL_FLOAT2__)
-    __cl_float2     v2[4];
-#if defined( __CL_FLOAT4__)
-    __cl_float4     v4[2];
-#if defined( __CL_FLOAT8__ )
-    __cl_float8     v8;
-typedef union
-    cl_float  CL_ALIGNED(64) s[16];
-   __CL_ANON_STRUCT__ struct{ cl_float  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_float  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; };
-#if defined( __CL_FLOAT2__)
-    __cl_float2     v2[8];
-#if defined( __CL_FLOAT4__)
-    __cl_float4     v4[4];
-#if defined( __CL_FLOAT8__ )
-    __cl_float8     v8[2];
-#if defined( __CL_FLOAT16__ )
-    __cl_float16    v16;
-/* --- cl_doublen ---- */
-typedef union
-    cl_double  CL_ALIGNED(16) s[2];
-   __CL_ANON_STRUCT__ struct{ cl_double  x, y; };
-   __CL_ANON_STRUCT__ struct{ cl_double s0, s1; };
-   __CL_ANON_STRUCT__ struct{ cl_double lo, hi; };
-#if defined( __CL_DOUBLE2__)
-    __cl_double2     v2;
-typedef union
-    cl_double  CL_ALIGNED(32) s[4];
-   __CL_ANON_STRUCT__ struct{ cl_double  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_double  s0, s1, s2, s3; };
-   __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; };
-#if defined( __CL_DOUBLE2__)
-    __cl_double2     v2[2];
-#if defined( __CL_DOUBLE4__)
-    __cl_double4     v4;
-/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
-typedef  cl_double4  cl_double3;
-typedef union
-    cl_double   CL_ALIGNED(64) s[8];
-   __CL_ANON_STRUCT__ struct{ cl_double  x, y, z, w; };
-   __CL_ANON_STRUCT__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7; };
-   __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; };
-#if defined( __CL_DOUBLE2__)
-    __cl_double2     v2[4];
-#if defined( __CL_DOUBLE4__)
-    __cl_double4     v4[2];
-#if defined( __CL_DOUBLE8__ )
-    __cl_double8     v8;
-typedef union
-    cl_double  CL_ALIGNED(128) s[16];
-   __CL_ANON_STRUCT__ struct{ cl_double  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
-   __CL_ANON_STRUCT__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
-   __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; };
-#if defined( __CL_DOUBLE2__)
-    __cl_double2     v2[8];
-#if defined( __CL_DOUBLE4__)
-    __cl_double4     v4[4];
-#if defined( __CL_DOUBLE8__ )
-    __cl_double8     v8[2];
-#if defined( __CL_DOUBLE16__ )
-    __cl_double16    v16;
-/* Macro to facilitate debugging
- * Usage:
- *   Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source.
- *   The first line ends with:   CL_PROGRAM_STRING_DEBUG_INFO \"
- *   Each line thereafter of OpenCL C source must end with: \n\
- *   The last line ends in ";
- *
- *   Example:
- *
- *   const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
- *   kernel void foo( int a, float * b )             \n\
- *   {                                               \n\
- *      // my comment                                \n\
- *      *b[ get_global_id(0)] = a;                   \n\
- *   }                                               \n\
- *   ";
- *
- * This should correctly set up the line, (column) and file information for your source
- * string so you can do source level debugging.
- */
-#define  __CL_STRINGIFY( _x )               # _x
-#define  _CL_STRINGIFY( _x )                __CL_STRINGIFY( _x )
-#define  CL_PROGRAM_STRING_DEBUG_INFO       "#line "  _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n"
-#ifdef __cplusplus
-#if defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__)
-    #if _MSC_VER >=1500
-    #pragma warning( pop )
-    #endif
-#endif  /* __CL_PLATFORM_H  */
diff --git a/dependencies/ocl-headers/CL/cl_version.h b/dependencies/ocl-headers/CL/cl_version.h
deleted file mode 100644
index 3844938..0000000
--- a/dependencies/ocl-headers/CL/cl_version.h
+++ /dev/null
@@ -1,81 +0,0 @@
- * Copyright (c) 2018-2020 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-#ifndef __CL_VERSION_H
-#define __CL_VERSION_H
-/* Detect which version to target */
-#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 300 (OpenCL 3.0)")
-#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 300 (OpenCL 3.0)")
-/* OpenCL Version */
-#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0)
-#define CL_VERSION_3_0  1
-#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
-#define CL_VERSION_2_2  1
-#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
-#define CL_VERSION_2_1  1
-#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
-#define CL_VERSION_2_0  1
-#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
-#define CL_VERSION_1_2  1
-#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
-#define CL_VERSION_1_1  1
-#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
-#define CL_VERSION_1_0  1
-/* Allow deprecated APIs for older OpenCL versions. */
-#endif  /* __CL_VERSION_H */
diff --git a/dependencies/ocl-headers/CL/opencl.h b/dependencies/ocl-headers/CL/opencl.h
deleted file mode 100644
index ef8dd1e..0000000
--- a/dependencies/ocl-headers/CL/opencl.h
+++ /dev/null
@@ -1,32 +0,0 @@
- * Copyright (c) 2008-2021 The Khronos Group Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
-#ifndef __OPENCL_H
-#define __OPENCL_H
-#ifdef __cplusplus
-extern "C" {
-#include <CL/cl.h>
-#include <CL/cl_gl.h>
-#include <CL/cl_ext.h>
-#ifdef __cplusplus
-#endif  /* __OPENCL_H   */
diff --git a/dependencies/ocl-stubs/ b/dependencies/ocl-stubs/
deleted file mode 100644
index 8cc0954..0000000
--- a/dependencies/ocl-stubs/
+++ /dev/null
@@ -1,164 +0,0 @@
-#!/usr/bin/env python3
-""" - Generate a C++ interface that automates loading OpenCL.
-Usage: <headerPaths...>
-The generated code looks roughly like this:
-// apis.h
-CL_MACRO ( returnType, funcname, (fargs...), (callArgs...) )
-import os.path
-import re
-import sys
- * This file is generated by {}
- * Do not edit this file directly.
- */""".format(os.path.basename(__file__))
-#ifndef CL_MACRO
-#error You need to define CL_MACRO before including apis
-def include_for_header(header):
-  return '#include <CL/{}>'.format(header)
-def extract_license_lines(lines):
-  license_lines = []
-  for line in lines:
-    license_lines.append(line)
-    if line.find('*/') != -1:
-      return license_lines
-  sys.exit("License text didn't terminate")
-assert (extract_license_lines(['/* LICENSE */',
-                               'something']) == ['/* LICENSE */'])
-assert (extract_license_lines(['/* LICENSE', ' * TEXT */',
-                               'something']) == ['/* LICENSE', ' * TEXT */'])
-assert (extract_license_lines(['/* LICENSE', ' * TEXT', ' */', 'something'
-                              ]) == ['/* LICENSE', ' * TEXT', ' */'])
-def parse_arg_strs(str):
-  paren_depth = 0
-  current_arg = ''
-  ret = []
-  for c in str:
-    if c == '(':
-      paren_depth += 1
-    elif c == ')':
-      paren_depth -= 1
-    if c == ',' and paren_depth == 0:
-      ret.append(current_arg)
-      current_arg = ''
-    else:
-      current_arg += c
-  if current_arg != '':
-    ret.append(current_arg)
-  return ret
-def process_type(raw):
-  # strip redundant [] (where one is before the name)
-  raw = re.sub(r'(\[[0-9]*\])\s*(\w+)\s*\[[0-9]*\]', r'\2\1', raw)
-  # strip cases where the name comment hinted at the number of elements in an array
-  raw = re.sub(r'\*\s*(\w+)\s*\[[0-9]+\]', r'*\1', raw)
-  raw = ' '.join(raw.split())
-  return raw
-def parse_api(api_signature):
-  m = None
-  api_signature = re.sub('extern', '', api_signature)
-  api_signature = re.sub('CL_\w+', '', api_signature)
-  m = re.match(r'\s*(.*)\s+(\w+)\((.*)\)\s*;', api_signature)
-  if m == None:
-    print(api_signature)
-  assert (m is not None)
-  assert (len(m.groups()) == 3)
-  arg_strs = None
-  if re.match('\s*void\s*',
-    arg_strs = []
-  else:
-    arg_strs = parse_arg_strs(
-  args = []
-  for arg_str in arg_strs:
-    nm ='(\w+)\s*(\)|\[|$)', arg_str)
-    assert (nm is not None)
-    args.append({'type': process_type(arg_str), 'name':})
-  return {'return':, 'name':, 'args': args}
-def extract_apis(lines):
-  state = 'scanning'
-  apis = []
-  api_signature = ''
-  for line in lines:
-    if state == 'scanning':
-      if line.find('CL_API_ENTRY') != -1 and line.find('typedef') == -1:
-        api_signature = line
-        if line.find(';') != -1:
-          apis.append(
-              parse_api(
-                  api_signature.replace('/*', '').replace('*/', '').replace(
-                      'CL_CALLBACK ', '')))
-          api_signature = ''
-        else:
-          state = 'expectAPILine'
-    elif state == 'expectAPILine':
-      api_signature += ' ' + line
-      if line.find(';') != -1:
-        apis.append(
-            parse_api(
-                api_signature.replace('/*', '').replace('*/', '').replace(
-                    'CL_CALLBACK ', '')))
-        api_signature = ''
-        state = 'scanning'
-  return apis
-def generate_apis(apis):
-  print()
-  print(MACRO_GUARD)
-  print()
-  for api in apis:
-    fargs = (arg['type'] for arg in api['args'])
-    cargs = (arg['name'] for arg in api['args'])
-    print('CL_MACRO( {}, {}, ({}), ({}) )\n'.format(api['return'], api['name'],
-                                                    ', '.join(fargs),
-                                                    ', '.join(cargs)))
-def main():
-  headers = sys.argv[1:]
-  apis = []
-  with open(headers[0]) as header:
-    lines = [line.strip() for line in header.readlines()]
-    license_lines = extract_license_lines(lines)
-  for header_name in headers:
-    with open(header_name) as header:
-      lines = [line.strip() for line in header.readlines()]
-      apis = apis + extract_apis(lines)
-  generate_apis(apis)
-if __name__ == '__main__':
-  main()
diff --git a/dependencies/ocl-stubs/stubs.cpp b/dependencies/ocl-stubs/stubs.cpp
deleted file mode 100644
index 2cf3700..0000000
--- a/dependencies/ocl-stubs/stubs.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <CL/cl.h>
-#include <CL/cl_gl.h>
-#include <CL/cl_egl.h>
-#include <CL/cl_ext.h>
-#include <CL/cl_gl_ext.h>
-#include <dlfcn.h>
-void* libHandle = nullptr;
-#define FUNC_TYPES(rettype, fname, fargs, callArgs)     \
-typedef rettype (* fname ## _t) fargs;
-#define FUNC_SYM(rettype, fname, fargs, callArgs)                                                     \
-rettype fname fargs {                                                                                 \
-  if (!libHandle)                                                                                     \
-    libHandle = dlopen( "", RTLD_NOW | RTLD_GLOBAL );                                     \
-                                                                                                      \
-  static fname ## _t func = nullptr;                                                                  \
-  if (!func)                                                                                          \
-    func = reinterpret_cast< fname ## _t >(dlsym(libHandle, #fname));                                 \
-                                                                                                      \
-  return func callArgs;                                                                               \
-                                                                                                      \
-#include "apis.h"
-#undef CL_MACRO
-#include "apis.h"
-#undef CL_MACRO
diff --git a/ b/
deleted file mode 100755
index 6fc037c..0000000
--- a/
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env bash
-set -e
-export TOP=$(pwd)
-# Prepare toolchain if needed
-if [[ ${JOB_ARCHITECTURE} != "" ]]; then
-    wget ${TOOLCHAIN_URL}
-    tar xf ${TOOLCHAIN_ARCHIVE}
-    export PATH=${TOOLCHAIN_DIR}/bin:${PATH}
-if [[ ( ${JOB_ARCHITECTURE} == "" && ${JOB_ENABLE_GL} == "1" ) ]]; then
-    sudo apt-get update
-    sudo apt-get -y install libglu1-mesa-dev freeglut3-dev mesa-common-dev libglew-dev
-# Prepare headers
-git clone
-cd OpenCL-Headers
-ln -s CL OpenCL # For OSX builds
-cd ..
-# Get and build loader
-git clone
-cd ${TOP}/OpenCL-ICD-Loader
-mkdir build
-cd build
-# Build CTS
-cd ${TOP}
-ls -l
-mkdir build
-cd build
-cmake -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
-      -DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \
-      -DOPENCL_LIBRARIES="-lOpenCL -lpthread" \
-      ..
-make -j2
diff --git a/scripts/android_bp_head b/scripts/android_bp_head
deleted file mode 100644
index c5cd394..0000000
--- a/scripts/android_bp_head
+++ /dev/null
@@ -1,121 +0,0 @@
-package {
-    default_applicable_licenses: ["external_OpenCL-CTS_license"],
-// Added automatically by a large-scale-change that took the approach of
-// 'apply every license found to every target'. While this makes sure we respect
-// every license restriction, it may not be entirely correct.
-// e.g. GPL in an MIT project might only apply to the contrib/ directory.
-// Please consider splitting the single license below into multiple licenses,
-// taking care not to lose any license_kind information, and overriding the
-// default license using the 'licenses: [...]' property on targets as needed.
-// For unused files, consider creating a 'fileGroup' with "//visibility:private"
-// to attach the license to, and including a comment whether the files may be
-// used in the current project.
-// See: http://go/android-license-faq
-license {
-    name: "external_OpenCL-CTS_license",
-    visibility: [":__subpackages__"],
-    license_kinds: [
-        "SPDX-license-identifier-Apache-2.0",
-        "SPDX-license-identifier-BSD",
-        "SPDX-license-identifier-MIT",
-        "SPDX-license-identifier-Unlicense",
-        "legacy_by_exception_only", // by exception only
-        "legacy_proprietary", // by exception only
-        "legacy_unencumbered",
-    ],
-    license_text: [
-        "LICENSE.txt",
-    ],
-cc_library_headers {
-    name: "ocl-harness-headers",
-    export_include_dirs: [
-        "test_common/harness",
-        "test_common"
-    ]
-cc_defaults {
-    name: "ocl-harness-defaults",
-    header_libs: [
-        "ocl-harness-headers",
-    ],
-    export_header_lib_headers: [
-        "ocl-harness-headers",
-    ],
-    cflags: [
-        "-Wno-#warnings",
-        "-Wno-absolute-value",
-        "-Wno-asm-operand-widths",
-        "-Wno-c++11-narrowing",
-        "-Wno-dangling-else",
-        "-Wno-date-time",
-        "-Wno-deprecated-declarations",
-        "-Wno-format",
-        "-Wno-ignored-pragmas",
-        "-Wno-ignored-qualifiers",
-        "-Wno-implicit-fallthrough",
-        "-Wno-logical-op-parentheses",
-        "-Wno-macro-redefined",
-        "-Wno-missing-braces",
-        "-Wno-missing-declarations",
-        "-Wno-missing-field-initializers",
-        "-Wno-non-virtual-dtor",
-        "-Wno-overloaded-virtual",
-        "-Wno-parentheses",
-        "-Wno-parentheses-equality",
-        "-Wno-reorder-ctor",
-        "-Wno-return-stack-address",
-        "-Wno-shift-negative-value",
-        "-Wno-sometimes-uninitialized",
-        "-Wno-switch",
-        "-Wno-unknown-pragmas",
-        "-Wno-unneeded-internal-declaration",
-        "-Wno-unused-function",
-        "-Wno-unused-label",
-        "-Wno-unused-parameter",
-        "-Wno-unused-variable",
-        "-Wno-writable-strings",
-        "-fexceptions",
-    ],
-    static_libs: [
-        "ocl-stubs"
-    ],
-cc_library {
-    name: "ocl-harness",
-    srcs: [ "test_common/harness/*.cpp", ],
-    defaults: [ "ocl-harness-defaults" ],
-cc_defaults {
-    name: "ocl-test-defaults",
-    defaults: [ "ocl-harness-defaults" ],
-    static_libs: [ "ocl-harness" ],
-    compile_multilib: "64",
-    multilib: {
-        lib64: {
-            suffix: "64",
-        },
-    },
-cc_defaults {
-    name: "ocl-test-image-defaults",
-    srcs: [ "test_conformance/images/common.cpp" ],
-    export_include_dirs: [ "test_conformance/images" ],
-    defaults: [ "ocl-test-defaults" ],
diff --git a/scripts/android_bp_tail b/scripts/android_bp_tail
deleted file mode 100644
index c295fc1..0000000
--- a/scripts/android_bp_tail
+++ /dev/null
@@ -1,18 +0,0 @@
-python_test_host {
-    name: "opencl_cts",
-    main: "scripts/",
-    srcs: [ "scripts/" ],
-    data: [ "scripts/test_opencl_cts.xml" ],
-    test_config: "scripts/test_opencl_cts.xml",
-    version: {
-        py2: {
-            enabled: false,
-        },
-        py3: {
-            enabled: true
-        }
-    },
-    test_options: {
-        unit_test: false,
-    },
diff --git a/scripts/ b/scripts/
deleted file mode 100644
index 3d916f6..0000000
--- a/scripts/
+++ /dev/null
@@ -1,158 +0,0 @@
-import json
-import os
-import re
-from xml.dom import minidom
-from xml.etree import ElementTree
-SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
-TEST_JSON = 'tests.json'
-def write_one_cc_test(test_details, f):
-  # TODO(b/161524664): Remove this exception for spir
-  if test_details['test_name'] == 'spir':
-    return
-  stringified_sources = map(lambda s: f'"{s}"', test_details['srcs'])
-  stringified_data = map(lambda s: f'"{s}"', test_details.get('data', []))
-  stringified_cflags = map(lambda s: f'"{s}"', test_details.get('cflags', []))
-  default = "ocl-test-defaults"
-  if test_details.get('image_type', False):
-    default = "ocl-test-image-defaults"
-  rtti = test_details.get('rtti', False)
-  cc_test_string = """
-cc_test {{
-    name: "{}",
-    srcs: [ {} ],
-    data: [ {} ],
-    cflags: [ {} ],
-    defaults: [ "{}" ],
-    rtti: {},
-    gtest: false
-           ", ".join(stringified_sources),
-           ", ".join(stringified_data),
-           ", ".join(stringified_cflags),
-           default,
-           (str(rtti)).lower())
-  empty_field_regex = re.compile("^\s*\w+: \[\s*\],?$")
-  cc_test_string = '\n'.join([line for line in cc_test_string.split('\n')
-                                   if not empty_field_regex.match(line)])
-  f.write(cc_test_string)
-def generate_android_bp():
-  android_bp_head_path = os.path.join(SCRIPT_DIR, 'android_bp_head')
-  android_bp_tail_path = os.path.join(SCRIPT_DIR, 'android_bp_tail')
-  with open('Android.bp', 'w') as android_bp:
-    with open(android_bp_head_path, 'r') as android_bp_head:
-      android_bp.write(
-    with open(TEST_JSON_PATH) as f:
-      tests = json.load(f)
-    for test in tests:
-      write_one_cc_test(test, android_bp)
-    with open(android_bp_tail_path, 'r') as android_bp_tail:
-      android_bp.write(
-def create_subelement_with_attribs(element, tag, attribs):
-  subelement = ElementTree.SubElement(element, tag)
-  for key, value in attribs.items():
-    subelement.attrib[key] = value
-  return subelement
-def generate_push_file_rules(configuration):
-  create_subelement_with_attribs(configuration, 'target_preparer',
-      { 'class': "" })
-  file_pusher = create_subelement_with_attribs(configuration, 'target_preparer',
-      { 'class': "" })
-  create_subelement_with_attribs(file_pusher, 'option',
-      { 'name': "cleanup", 'value': "true" })
-  create_subelement_with_attribs(file_pusher, 'option',
-      { 'name': "append-bitness", 'value': "true" })
-  with open(TEST_JSON_PATH, "r") as f:
-    tests = json.load(f)
-  for test in tests:
-    if test.get('manual_only', False):
-      continue
-    create_subelement_with_attribs(file_pusher, 'option',
-        {
-          'name': "push-file",
-          'key': test['binary_name'],
-          'value': "/data/nativetest64/unrestricted/{}".format(test['binary_name'])
-        })
-def generate_test_rules(configuration):
-  with open(TEST_JSON_PATH, "r") as f:
-    tests = json.load(f)
-  for test in tests:
-    if test.get('manual_only', False):
-      continue
-    test_rule = create_subelement_with_attribs(configuration, 'test',
-        { 'class': "" })
-    create_subelement_with_attribs(test_rule, 'option',
-        { 'name': "par-file-name", 'value': "opencl_cts" })
-    create_subelement_with_attribs(test_rule, 'option',
-        { 'name': "inject-android-serial", 'value': "true" })
-    create_subelement_with_attribs(test_rule, 'option',
-        { 'name': "test-timeout", 'value': test.get('timeout', "30m") })
-    create_subelement_with_attribs(test_rule, 'option',
-        { 'name': "python-options", 'value': test["test_name"] })
-    create_subelement_with_attribs(test_rule, 'option',
-        { 'name': "python-options",
-          'value': "/data/nativetest64/unrestricted/{}".format(test['binary_name']) })
-    for arg in test.get('arguments', []):
-      create_subelement_with_attribs(test_rule, 'option',
-          { 'name': "python-options", 'value': arg })
-def generate_test_xml():
-  configuration = ElementTree.Element('configuration')
-  configuration.attrib['description'] = "Config to run OpenCL CTS"
-  logcat = ElementTree.SubElement(configuration, 'option')
-  logcat.attrib['name'] = "logcat-on-failure"
-  logcat.attrib['value'] = "false"
-  generate_push_file_rules(configuration)
-  generate_test_rules(configuration)
-  stringified_configuration = ElementTree.tostring(configuration, 'utf-8')
-  reparsed_configuration = minidom.parseString(stringified_configuration)
-  with open('test_opencl_cts.xml', 'w') as f:
-    f.write(reparsed_configuration.toprettyxml(indent=" "*4))
-def main():
-  generate_android_bp()
-  generate_test_xml()
-  print("Don't forget to move -")
-  print("    Android.bp -> {ANDROID_ROOT}/external/OpenCL-CTS/Android.bp")
-  print("    test_opencl_cts.xml -> {ANDROID_ROOT}/external/OpenCL-CTS/scripts/test_opencl_cts.xml")
-if __name__ == '__main__':
-  main()
diff --git a/scripts/ b/scripts/
deleted file mode 100644
index afe0c34..0000000
--- a/scripts/
+++ /dev/null
@@ -1,132 +0,0 @@
-#!/usr/bin/env python3
-from typing import List, Optional, Tuple
-import argparse
-import os
-import pipes
-import subprocess
-import sys
-import unittest
-def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
-  parser = argparse.ArgumentParser()
-  parser.add_argument('test_name', type=str, help="Name of the test")
-  parser.add_argument('binary_path', type=str,
-                      help="Full path to the binary on device")
-  parser.add_argument('--subtests', type=str, nargs='*',
-                      help="Specific subtests to run")
-  parser.add_argument('--test_args', type=str, nargs='*',
-                      help="Unfiltered arguments to pass to the run command")
-  args = parser.parse_args(args)
-  args.subtests = args.subtests or []
-  args.test_args = args.test_args or []
-  return args
-def run_command(command: str) -> Tuple[int, str, str]:
-  serial_number = os.environ.get("ANDROID_SERIAL", "")
-  if not serial_number:
-    raise "$ANDROID_SERIAL is empty, device must be specified"
-  full_command = ["adb", "-s", serial_number, "shell", command]
-  ret =
-      full_command, capture_output=True, universal_newlines=True)
-  return ret.returncode, ret.stdout, ret.stderr
-def get_all_subtests(binary_path: str) -> List[str]:
-  retcode, output, _ = run_command(f'{binary_path} --help')
-  test_name_line = "Test names"
-  index = output.find(test_name_line)
-  if index == -1:
-    return []
-  test_names_output = output[index:]
-  test_names = []
-  # Skip the first line which starts with "Test names"
-  for test_name in test_names_output.splitlines()[1:]:
-    if not test_name.startswith((" ", "\t")):
-      break
-    test_names.append(test_name.strip())
-  return test_names
-def get_subtests(binary_path: str, subtests: List[str]) -> List[str]:
-  all_subtests = set(get_all_subtests(binary_path))
-  if not subtests:
-    return all_subtests
-  subtests = set(subtests)
-  selected_subtests = subtests & all_subtests
-  remaining_subtests = subtests - all_subtests
-  if remaining_subtests:
-    print("Could not find subtests: {}".format(', '.join(remaining_subtests)),
-          file=sys.stderr)
-  return sorted(list(selected_subtests))
-class OpenCLTest(unittest.TestCase):
-  def __init__(self, test_name: str, binary_path: str, args: List[str]):
-    self._test_name = test_name
-    self._binary_path = binary_path
-    self._args = args
-    self.command = " ".join(
-        [self._binary_path, self._test_name] +
-        list(map(pipes.quote, self._args))
-    )
-    self.test_func_name = self._test_name
-    setattr(self, self.test_func_name, self.genericTest)
-    super().__init__(methodName=self.test_func_name)
-  def genericTest(self):
-    retcode, output, oerror = run_command(self.command)
-    # TODO(layog): CTS currently return non-zero return code if the
-    # implementation is missing for some API even if the API is not supported by
-    # the version reported by the driver. Need to patch upstream.
-    missing_line = f"ERROR: Test '{self._test_name}' is missing implementation"
-    if missing_line in output or missing_line in oerror:
-      self.skipTest(f"{self._test_name} API not available in the driver")
-    self.assertFalse(retcode, "Test exited with non-zero status")
-    # TODO(b/158646251): Update upstream to exit with proper error code
-    passed_line = "PASSED test."
-    self.assertTrue(passed_line in output)
-def main():
-  """main entrypoint for test runner"""
-  args = parse_args(sys.argv[1:])
-  # HACK: Name hack to report the actual test name
-  OpenCLTest.__name__ = args.test_name
-  OpenCLTest.__qualname__ = args.test_name
-  suite = unittest.TestSuite()
-  subtests = get_subtests(args.binary_path, args.subtests)
-  for subtest in subtests:
-    suite.addTest(OpenCLTest(subtest, args.binary_path, args.test_args))
-  runner = unittest.TextTestRunner(
-      stream=sys.stderr, verbosity=ANDROID_RUNNER_REQUIRED_VERBOSITY)
-if __name__ == "__main__":
-  main()
diff --git a/scripts/test_opencl_cts.xml b/scripts/test_opencl_cts.xml
deleted file mode 100644
index de9c610..0000000
--- a/scripts/test_opencl_cts.xml
+++ /dev/null
@@ -1,257 +0,0 @@
-<?xml version="1.0" ?>
-<configuration description="Config to run OpenCL CTS">
-    <option name="logcat-on-failure" value="false"/>
-    <target_preparer class=""/>
-    <target_preparer class="">
-        <option name="cleanup" value="true"/>
-        <option name="append-bitness" value="true"/>
-        <option key="ocl-test-allocations" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-allocations"/>
-        <option key="ocl-test-api" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-api"/>
-        <option key="ocl-test-atomics" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-atomics"/>
-        <option key="ocl-test-basic" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-basic"/>
-        <option key="ocl-test-buffers" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-buffers"/>
-        <option key="ocl-test-c11-atomics" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-c11-atomics"/>
-        <option key="ocl-test-commonfns" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-commonfns"/>
-        <option key="ocl-test-computeinfo" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-computeinfo"/>
-        <option key="ocl-test-contractions" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-contractions"/>
-        <option key="ocl-test-device-execution" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-device-execution"/>
-        <option key="ocl-test-device-partition" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-device-partition"/>
-        <option key="ocl-test-device-timer" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-device-timer"/>
-        <option key="ocl-test-events" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-events"/>
-        <option key="ocl-test-generic-address-space" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-generic-address-space"/>
-        <option key="ocl-test-geometrics" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-geometrics"/>
-        <option key="ocl-test-mem-host-flags" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-mem-host-flags"/>
-        <option key="ocl-test-multiple-device-context" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-multiple-device-context"/>
-        <option key="ocl-test-non-uniform-work-group" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-non-uniform-work-group"/>
-        <option key="ocl-test-pipes" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-pipes"/>
-        <option key="ocl-test-profiling" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-profiling"/>
-        <option key="ocl-test-relationals" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-relationals"/>
-        <option key="ocl-test-subgroups" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-subgroups"/>
-        <option key="ocl-test-svm" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-svm"/>
-        <option key="ocl-test-thread-dimensions" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-thread-dimensions"/>
-        <option key="ocl-test-vectors" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-vectors"/>
-        <option key="ocl-test-image-clcopyimage" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-clcopyimage"/>
-        <option key="ocl-test-image-clfillimage" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-clfillimage"/>
-        <option key="ocl-test-image-clgetinfo" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-clgetinfo"/>
-        <option key="ocl-test-image-clreadwriteimage" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-clreadwriteimage"/>
-        <option key="ocl-test-image-kernel-image-methods" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-kernel-image-methods"/>
-        <option key="ocl-test-image-samplerlessreads" name="push-file" value="/data/nativetest64/unrestricted/ocl-test-image-samplerlessreads"/>
-    </target_preparer>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="allocations"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-allocations"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="api"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-api"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="atomics"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-atomics"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="basic"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-basic"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="buffers"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-buffers"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="120m"/>
-        <option name="python-options" value="c11-atomics"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-c11-atomics"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="commonfns"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-commonfns"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="computeinfo"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-computeinfo"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="contractions"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-contractions"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="device-execution"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-device-execution"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="device-partition"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-device-partition"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="device-timer"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-device-timer"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="events"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-events"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="generic-address-space"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-generic-address-space"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="geometrics"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-geometrics"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="mem-host-flags"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-mem-host-flags"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="multiple-device-context"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-multiple-device-context"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="non-uniform-work-group"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-non-uniform-work-group"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="pipes"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-pipes"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="profiling"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-profiling"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="relationals"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-relationals"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="subgroups"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-subgroups"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="svm"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-svm"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="thread-dimensions"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-thread-dimensions"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="vectors"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-vectors"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="clcopyimage"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-clcopyimage"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="clfillimage"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-clfillimage"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="clgetinfo"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-clgetinfo"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="clreadwriteimage"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-clreadwriteimage"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="kernel-image-methods"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-kernel-image-methods"/>
-    </test>
-    <test class="">
-        <option name="par-file-name" value="opencl_cts"/>
-        <option name="inject-android-serial" value="true"/>
-        <option name="test-timeout" value="30m"/>
-        <option name="python-options" value="samplerlessreads"/>
-        <option name="python-options" value="/data/nativetest64/unrestricted/ocl-test-image-samplerlessreads"/>
-    </test>
diff --git a/scripts/tests.json b/scripts/tests.json
deleted file mode 100644
index 769c1d7..0000000
--- a/scripts/tests.json
+++ /dev/null
@@ -1,325 +0,0 @@
-    {
-        "test_name": "allocations",
-        "binary_name": "ocl-test-allocations",
-        "srcs": [
-            "test_conformance/allocations/*.cpp"
-        ]
-    },
-    {
-        "test_name": "api",
-        "binary_name": "ocl-test-api",
-        "srcs": [
-            "test_conformance/api/*.cpp"
-        ]
-    },
-    {
-        "test_name": "atomics",
-        "binary_name": "ocl-test-atomics",
-        "srcs": [
-            "test_conformance/atomics/*.cpp"
-        ]
-    },
-    {
-        "test_name": "basic",
-        "binary_name": "ocl-test-basic",
-        "srcs": [
-            "test_conformance/basic/*.cpp"
-        ]
-    },
-    {
-        "test_name": "buffers",
-        "binary_name": "ocl-test-buffers",
-        "srcs": [
-            "test_conformance/buffers/*.cpp"
-        ]
-    },
-    {
-        "test_name": "c11-atomics",
-        "binary_name": "ocl-test-c11-atomics",
-        "srcs": [
-            "test_conformance/c11_atomics/*.cpp"
-        ],
-        "timeout": "120m"
-    },
-    {
-        "test_name": "commonfns",
-        "binary_name": "ocl-test-commonfns",
-        "srcs": [
-            "test_conformance/commonfns/*.cpp"
-        ]
-    },
-    {
-        "test_name": "compiler",
-        "binary_name": "ocl-test-compiler",
-        "srcs": [
-            "test_conformance/compiler/*.cpp"
-        ],
-        "data": [
-            "test_conformance/compiler/includeTestDirectory/testIncludeFile.h",
-            "test_conformance/compiler/secondIncludeTestDirectory/testIncludeFile.h"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "computeinfo",
-        "binary_name": "ocl-test-computeinfo",
-        "srcs": [
-            "test_conformance/computeinfo/*.cpp"
-        ]
-    },
-    {
-        "test_name": "contractions",
-        "binary_name": "ocl-test-contractions",
-        "srcs": [
-            "test_conformance/contractions/*.cpp"
-        ]
-    },
-    {
-        "test_name": "conversions",
-        "binary_name": "ocl-test-conversions",
-        "srcs": [
-            "test_conformance/conversions/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "device-execution",
-        "binary_name": "ocl-test-device-execution",
-        "srcs": [
-            "test_conformance/device_execution/*.cpp"
-        ]
-    },
-    {
-        "test_name": "device-partition",
-        "binary_name": "ocl-test-device-partition",
-        "srcs": [
-            "test_conformance/device_partition/*.cpp"
-        ]
-    },
-    {
-        "test_name": "device-timer",
-        "binary_name": "ocl-test-device-timer",
-        "srcs": [
-            "test_conformance/device_timer/*.cpp"
-        ]
-    },
-    {
-        "test_name": "events",
-        "binary_name": "ocl-test-events",
-        "srcs": [
-            "test_conformance/events/*.cpp"
-        ]
-    },
-    {
-        "test_name": "generic-address-space",
-        "binary_name": "ocl-test-generic-address-space",
-        "srcs": [
-            "test_conformance/generic_address_space/*.cpp"
-        ]
-    },
-    {
-        "test_name": "geometrics",
-        "binary_name": "ocl-test-geometrics",
-        "srcs": [
-            "test_conformance/geometrics/*.cpp"
-        ]
-    },
-    {
-        "test_name": "half",
-        "binary_name": "ocl-test-half",
-        "srcs": [
-            "test_conformance/half/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "integer-ops",
-        "binary_name": "ocl-test-integer-ops",
-        "srcs": [
-            "test_conformance/integer_ops/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "math-brute-force",
-        "binary_name": "ocl-test-math-brute-force",
-        "srcs": [
-            "test_conformance/math_brute_force/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "mem-host-flags",
-        "binary_name": "ocl-test-mem-host-flags",
-        "srcs": [
-            "test_conformance/mem_host_flags/*.cpp"
-        ]
-    },
-    {
-        "test_name": "multiple-device-context",
-        "binary_name": "ocl-test-multiple-device-context",
-        "srcs": [
-            "test_conformance/multiple_device_context/*.cpp"
-        ]
-    },
-    {
-        "test_name": "non-uniform-work-group",
-        "binary_name": "ocl-test-non-uniform-work-group",
-        "srcs": [
-            "test_conformance/non_uniform_work_group/*.cpp"
-        ]
-    },
-    {
-        "test_name": "pipes",
-        "binary_name": "ocl-test-pipes",
-        "srcs": [
-            "test_conformance/pipes/*.cpp"
-        ]
-    },
-    {
-        "test_name": "printf",
-        "binary_name": "ocl-test-printf",
-        "srcs": [
-            "test_conformance/printf/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "profiling",
-        "binary_name": "ocl-test-profiling",
-        "srcs": [
-            "test_conformance/profiling/*.cpp"
-        ]
-    },
-    {
-        "test_name": "relationals",
-        "binary_name": "ocl-test-relationals",
-        "srcs": [
-            "test_conformance/relationals/*.cpp"
-        ]
-    },
-    {
-        "test_name": "select",
-        "binary_name": "ocl-test-select",
-        "srcs": [
-            "test_conformance/select/*.cpp"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "spir",
-        "binary_name": "ocl-test-spir",
-        "srcs": [
-            "test_conformance/spir/*.cpp",
-            "test_conformance/math_brute_force/FunctionList.cpp",
-            "test_common/miniz/miniz.c"
-        ],
-        "data": [ "test_conformance/spir/*.zip" ],
-        "cflags": [
-            "-Wno-unused-private-field"
-        ],
-        "rtti": true,
-        "manual_only": true
-    },
-    {
-        "test_name": "spirv-new",
-        "binary_name": "ocl-test-spirv-new",
-        "srcs": [
-            "test_conformance/spirv_new/*.cpp",
-            "test_conformance/math_brute_force/reference_math.cpp",
-            "test_conformance/math_brute_force/utility.cpp"
-        ],
-        "data": [
-            "test_conformance/spirv_new/spirv_asm/*",
-            "test_conformance/spirv_new/spirv_bin/*"
-        ],
-        "manual_only": true
-    },
-    {
-        "test_name": "subgroups",
-        "binary_name": "ocl-test-subgroups",
-        "srcs": [
-            "test_conformance/subgroups/*.cpp"
-        ]
-    },
-    {
-        "test_name": "svm",
-        "binary_name": "ocl-test-svm",
-        "srcs": [
-            "test_conformance/SVM/*.cpp"
-        ]
-    },
-    {
-        "test_name": "thread-dimensions",
-        "binary_name": "ocl-test-thread-dimensions",
-        "srcs": [
-            "test_conformance/thread_dimensions/*.cpp"
-        ]
-    },
-    {
-        "test_name": "vectors",
-        "binary_name": "ocl-test-vectors",
-        "srcs": [
-            "test_conformance/vectors/*.cpp"
-        ]
-    },
-    {
-        "test_name": "clcopyimage",
-        "binary_name": "ocl-test-image-clcopyimage",
-        "srcs": [
-            "test_conformance/images/clCopyImage/*.cpp"
-        ],
-        "image_type": true
-    },
-    {
-        "test_name": "clfillimage",
-        "binary_name": "ocl-test-image-clfillimage",
-        "srcs": [
-            "test_conformance/images/clFillImage/*.cpp"
-        ],
-        "image_type": true
-    },
-    {
-        "test_name": "clgetinfo",
-        "binary_name": "ocl-test-image-clgetinfo",
-        "srcs": [
-            "test_conformance/images/clGetInfo/*.cpp"
-        ],
-        "image_type": true
-    },
-    {
-        "test_name": "clreadwriteimage",
-        "binary_name": "ocl-test-image-clreadwriteimage",
-        "srcs": [
-            "test_conformance/images/clReadWriteImage/*.cpp"
-        ],
-        "image_type": true
-    },
-    {
-        "test_name": "kernel-image-methods",
-        "binary_name": "ocl-test-image-kernel-image-methods",
-        "srcs": [
-            "test_conformance/images/kernel_image_methods/*.cpp"
-        ],
-        "image_type": true
-    },
-    {
-        "test_name": "kernel-read-write",
-        "binary_name": "ocl-test-image-kernel-read-write",
-        "srcs": [
-            "test_conformance/images/kernel_read_write/*.cpp"
-        ],
-        "manual_only": true,
-        "image_type": true
-    },
-    {
-        "test_name": "samplerlessreads",
-        "binary_name": "ocl-test-image-samplerlessreads",
-        "srcs": [
-            "test_conformance/images/samplerlessReads/*.cpp"
-        ],
-        "image_type": true
-    }
diff --git a/test_common/CMakeLists.txt b/test_common/CMakeLists.txt
index 2d4bc19..e890a67 100644
--- a/test_common/CMakeLists.txt
+++ b/test_common/CMakeLists.txt
@@ -8,14 +8,12 @@
-    harness/featureHelpers.cpp
-    harness/propertyHelpers.cpp
diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp
index def78d7..c02a4b1 100644
--- a/test_common/gl/helpers.cpp
+++ b/test_common/gl/helpers.cpp
@@ -14,7 +14,11 @@
 // limitations under the License.
 #include "helpers.h"
-#include "harness/imageHelpers.h"
+#include "../harness/imageHelpers.h"
+// convert_float_to_half and convert_half_to_float may be found in test_conformance/images/image_helpers.cpp
+cl_ushort convert_float_to_half( cl_float f );
+cl_float  convert_half_to_float( cl_ushort h );
 #if defined( __APPLE__ )
     #include <OpenGL/glu.h>
@@ -1277,7 +1281,9 @@
         case kUInt:
         *((unsigned int*)p) = val*0xffffffff;
-        case kHalf: *((cl_half *)p) = convert_float_to_half(val); break;
+        case kHalf:
+          *((cl_ushort*)p) = convert_float_to_half(val);
+          break;
           log_error("Test error: unexpected type enum 0x%x\n",type);
@@ -1539,7 +1545,9 @@
         case kUInt:
           *((unsigned int*)p) = val*0xffffffff;
-        case kHalf: *((cl_half *)p) = convert_float_to_half(val); break;
+        case kHalf:
+          *((cl_ushort*)p) = convert_float_to_half(val);
+          break;
           log_error("Test error: unexpected type enum 0x%x\n",type);
diff --git a/test_common/gles/gl_headers.h b/test_common/gles/gl_headers.h
index e0d4632..849da71 100644
--- a/test_common/gles/gl_headers.h
+++ b/test_common/gles/gl_headers.h
@@ -24,12 +24,10 @@
 #include <GLES3/gl3.h>
 #include <GLES2/gl2.h>
-#define glTexImage3DOES glTexImage3D
-#define glUnmapBufferOES glUnmapBuffer
-#define glMapBufferRangeEXT glMapBufferRange
 #include <GLES2/gl2ext.h>
+#include <GLES2/gl2extQCOM.h>
 // Some macros to minimize the changes in the tests from GL to GLES2
 #define glGenRenderbuffersEXT        glGenRenderbuffers
@@ -42,26 +40,21 @@
 #define glDeleteFramebuffersEXT      glDeleteFramebuffers
 #define glBindFramebufferEXT         glBindFramebuffer
 #define glFramebufferRenderbufferEXT glFramebufferRenderbuffer
-#ifndef GL_ES_VERSION_3_0
-#define GL_RGBA32F GL_RGBA32F_EXT
+#define glTexImage3D                 glTexImage3DOES
 #define glutGetProcAddress           eglGetProcAddress
+#define GL_TEXTURE_3D                GL_TEXTURE_3D_OES
+#define GL_READ_ONLY                 GL_BUFFER_ACCESS_OES
+#define GL_BGRA                      GL_BGRA_EXT
 #define GL_RGBA32F_ARB               GL_RGBA
 typedef unsigned short GLhalf;
diff --git a/test_common/gles/helpers.cpp b/test_common/gles/helpers.cpp
index 34f40b4..188f903 100644
--- a/test_common/gles/helpers.cpp
+++ b/test_common/gles/helpers.cpp
@@ -16,7 +16,6 @@
 #include "helpers.h"
 #include "gl_headers.h"
-#include "CL/cl_half.h"
 #define CHECK_ERROR()\
     {GLint __error = glGetError(); if(__error) {log_error( "GL ERROR: %s!\n", gluErrorString( err ));}}
@@ -962,6 +961,49 @@
+cl_ushort float2half_rte( float f )
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+    }
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+    return (u.u >> (24-11)) | sign;
 void* CreateRandomData( ExplicitType type, size_t count, MTdata d )
@@ -1058,8 +1100,7 @@
             for( size_t i = 0; i < count; i++ )
-                p[i] = cl_half_from_float(get_random_float(0.f, 1.f, d),
-                                          CL_HALF_RTE);
+                p[ i ] = float2half_rte(get_random_float( 0.f, 1.f, d ));
             return (void*)p;
diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp
index 31985aa..c329452 100644
--- a/test_common/harness/ThreadPool.cpp
+++ b/test_common/harness/ThreadPool.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,10 +19,9 @@
 #include <stdio.h>
 #include <stdlib.h>
-#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
-// or any other POSIX system
+#if  defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 )  // or any other POSIX system
-#if defined(_WIN32)
+#if defined( _WIN32 )
 #include <windows.h>
 #if defined(_MSC_VER)
 #include <intrin.h>
@@ -39,89 +38,84 @@
 #endif // !_WIN32
 // declarations
-#ifdef _WIN32
-void ThreadPool_WorkerFunc(void *p);
+#ifdef  _WIN32
+void ThreadPool_WorkerFunc( void *p );
-void *ThreadPool_WorkerFunc(void *p);
+void *ThreadPool_WorkerFunc( void *p );
 void ThreadPool_Init(void);
 void ThreadPool_Exit(void);
-#if defined(__MINGW32__)
-// Mutex for implementing super heavy atomic operations if you don't have GCC or
-// MSVC
-#elif defined(__GNUC__) || defined(_MSC_VER)
+#if defined (__MINGW32__)
+    // Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
+    CRITICAL_SECTION     gAtomicLock;
+#elif defined( __GNUC__ ) || defined( _MSC_VER)
-pthread_mutex_t gAtomicLock;
+    pthread_mutex_t     gAtomicLock;
-// Atomic add operator with mem barrier.  Mem barrier needed to protect state
-// modified by the worker functions.
-cl_int ThreadPool_AtomicAdd(volatile cl_int *a, cl_int b)
+// Atomic add operator with mem barrier.  Mem barrier needed to protect state modified by the worker functions.
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
-#if defined(__MINGW32__)
+#if defined (__MINGW32__)
     // No atomics on Mingw32
     cl_int old = *a;
     *a = old + b;
     return old;
-#elif defined(__GNUC__)
-    // GCC extension:
-    //
-    return __sync_fetch_and_add(a, b);
-    // do we need __sync_synchronize() here, too?  GCC docs are unclear whether
-    // __sync_fetch_and_add does a synchronize
-#elif defined(_MSC_VER)
-    return (cl_int)_InterlockedExchangeAdd((volatile LONG *)a, (LONG)b);
+#elif defined( __GNUC__ )
+    // GCC extension:
+    return __sync_fetch_and_add( a, b );
+    // do we need __sync_synchronize() here, too?  GCC docs are unclear whether __sync_fetch_and_add does a synchronize
+#elif defined( _MSC_VER )
+    return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
-#warning Please add a atomic add implementation here, with memory barrier.  Fallback code is slow.
-    if (pthread_mutex_lock(&gAtomicLock))
-        log_error("Atomic operation failed. pthread_mutex_lock(&gAtomicLock) "
-                  "returned an error\n");
+    #warning  Please add a atomic add implementation here, with memory barrier.  Fallback code is slow.
+    if( pthread_mutex_lock(&gAtomicLock) )
+        log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
     cl_int old = *a;
     *a = old + b;
-    if (pthread_mutex_unlock(&gAtomicLock))
-        log_error("Failed to release gAtomicLock. Further atomic operations "
-                  "may deadlock!\n");
+    if( pthread_mutex_unlock(&gAtomicLock) )
+        log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
     return old;
-#if defined(_WIN32)
+#if defined( _WIN32 )
 // Uncomment the following line if Windows XP support is not required.
+#define _INIT_ONCE           INIT_ONCE
+#define _PINIT_ONCE          PINIT_ONCE
 #define _InitOnceExecuteOnce InitOnceExecuteOnce
 typedef volatile LONG _INIT_ONCE;
-#define _INIT_ONCE_DONE 2
+#define _INIT_ONCE_DONE          2
-static BOOL _InitOnceExecuteOnce(_PINIT_ONCE InitOnce, _PINIT_ONCE_FN InitFn,
-                                 PVOID Parameter, LPVOID *Context)
+static BOOL _InitOnceExecuteOnce(
+  _PINIT_ONCE InitOnce,
+  PVOID Parameter,
+  LPVOID *Context
-    while (*InitOnce != _INIT_ONCE_DONE)
+    while ( *InitOnce != _INIT_ONCE_DONE )
-        if (*InitOnce != _INIT_ONCE_IN_PROGRESS
-            && _InterlockedCompareExchange(InitOnce, _INIT_ONCE_IN_PROGRESS,
-                                           _INIT_ONCE_UNINITIALIZED)
-                == _INIT_ONCE_UNINITIALIZED)
+        if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
-            InitFn(InitOnce, Parameter, Context);
+            InitFn( InitOnce, Parameter, Context );
             *InitOnce = _INIT_ONCE_DONE;
             return TRUE;
-        Sleep(1);
+        Sleep( 1 );
     return TRUE;
@@ -131,352 +125,312 @@
 #define _InitializeConditionVariable InitializeConditionVariable
-#define _SleepConditionVariableCS SleepConditionVariableCS
-#define _WakeAllConditionVariable WakeAllConditionVariable
+#define _SleepConditionVariableCS    SleepConditionVariableCS
+#define _WakeAllConditionVariable    WakeAllConditionVariable
 typedef struct
-    HANDLE mEvent; // Used to park the thread.
-    // Used to protect mWaiters, mGeneration and mReleaseCount:
-    volatile cl_int mWaiters; // Number of threads waiting on this cond var.
-    volatile cl_int mGeneration; // Wait generation count.
-    volatile cl_int mReleaseCount; // Number of releases to execute before
-                                   // reseting the event.
+    HANDLE           mEvent; // Used to park the thread.
+    CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
+    volatile cl_int  mWaiters; // Number of threads waiting on this cond var.
+    volatile cl_int  mGeneration; // Wait generation count.
+    volatile cl_int  mReleaseCount; // Number of releases to execute before reseting the event.
-static void _InitializeConditionVariable(_PCONDITION_VARIABLE cond_var)
+static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
-    cond_var->mEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
-    InitializeCriticalSection(cond_var->mLock);
+    cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
+    InitializeCriticalSection( cond_var->mLock );
     cond_var->mWaiters = 0;
     cond_var->mGeneration = 0;
-#if !defined(NDEBUG)
+#if !defined ( NDEBUG )
     cond_var->mReleaseCount = 0;
 #endif // !NDEBUG
-static void _SleepConditionVariableCS(_PCONDITION_VARIABLE cond_var,
-                                      PCRITICAL_SECTION cond_lock,
-                                      DWORD ignored)
+static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
-    EnterCriticalSection(cond_var->mLock);
+    EnterCriticalSection( cond_var->mLock );
     cl_int generation = cond_var->mGeneration;
-    LeaveCriticalSection(cond_var->mLock);
-    LeaveCriticalSection(cond_lock);
+    LeaveCriticalSection( cond_var->mLock );
+    LeaveCriticalSection( cond_lock );
-    while (TRUE)
+    while ( TRUE )
-        WaitForSingleObject(cond_var->mEvent, INFINITE);
-        EnterCriticalSection(cond_var->mLock);
-        BOOL done =
-            cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
-        LeaveCriticalSection(cond_var->mLock);
-        if (done)
+        WaitForSingleObject( cond_var->mEvent, INFINITE );
+        EnterCriticalSection( cond_var->mLock );
+        BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
+        LeaveCriticalSection( cond_var->mLock );
+        if ( done )
-    EnterCriticalSection(cond_lock);
-    EnterCriticalSection(cond_var->mLock);
-    if (--cond_var->mReleaseCount == 0)
+    EnterCriticalSection( cond_lock );
+    EnterCriticalSection( cond_var->mLock );
+    if ( --cond_var->mReleaseCount == 0 )
-        ResetEvent(cond_var->mEvent);
+        ResetEvent( cond_var->mEvent );
-    LeaveCriticalSection(cond_var->mLock);
+    LeaveCriticalSection( cond_var->mLock );
-static void _WakeAllConditionVariable(_PCONDITION_VARIABLE cond_var)
+static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
-    EnterCriticalSection(cond_var->mLock);
-    if (cond_var->mWaiters > 0)
+    EnterCriticalSection( cond_var->mLock );
+    if (cond_var->mWaiters > 0 )
         cond_var->mReleaseCount = cond_var->mWaiters;
-        SetEvent(cond_var->mEvent);
+        SetEvent( cond_var->mEvent );
-    LeaveCriticalSection(cond_var->mLock);
+    LeaveCriticalSection( cond_var->mLock );
 #endif // _WIN32
-#define MAX_COUNT (1 << 29)
+#define MAX_COUNT   (1<<29)
-// Global state to coordinate whether the threads have been launched
-// successfully or not
-#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
+// Global state to coordinate whether the threads have been launched successfully or not
+#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
 static _INIT_ONCE threadpool_init_control;
-#elif defined(_WIN32) // MingW of XP
+#elif defined (_WIN32)  // MingW of XP
 static int threadpool_init_control;
 #else // Posix platforms
 pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
-cl_int threadPoolInitErr = -1; // set to CL_SUCCESS on successful thread launch
+cl_int threadPoolInitErr = -1;          // set to CL_SUCCESS on successful thread launch
-// critical region lock around ThreadPool_Do.  We can only run one ThreadPool_Do
-// at a time, because we are too lazy to set up a queue here, and don't expect
-// to need one.
-#if defined(_WIN32)
-CRITICAL_SECTION gThreadPoolLock[1];
+// critical region lock around ThreadPool_Do.  We can only run one ThreadPool_Do at a time,
+// because we are too lazy to set up a queue here, and don't expect to need one.
+#if defined( _WIN32 )
+CRITICAL_SECTION    gThreadPoolLock[1];
 #else // !_WIN32
-pthread_mutex_t gThreadPoolLock;
+pthread_mutex_t     gThreadPoolLock;
 #endif // !_WIN32
 // Condition variable to park ThreadPool threads when not working
-#if defined(_WIN32)
-CRITICAL_SECTION cond_lock[1];
+#if defined( _WIN32 )
+CRITICAL_SECTION    cond_lock[1];
 #else // !_WIN32
-pthread_mutex_t cond_lock;
-pthread_cond_t cond_var;
+pthread_mutex_t     cond_lock;
+pthread_cond_t      cond_var;
 #endif // !_WIN32
-// Condition variable state. How many iterations on the function left to run,
-// set to CL_INT_MAX to cause worker threads to exit. Note: this value might
-// go negative.
-volatile cl_int gRunCount = 0;
+volatile cl_int     gRunCount = 0;              // Condition variable state. How many iterations on the function left to run.
+                                                // set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
 // State that only changes when the threadpool is not working.
-volatile TPFuncPtr gFunc_ptr = NULL;
-volatile void *gUserInfo = NULL;
-volatile cl_int gJobCount = 0;
+volatile TPFuncPtr  gFunc_ptr = NULL;
+volatile void       *gUserInfo = NULL;
+volatile cl_int     gJobCount = 0;
 // State that may change while the thread pool is working
-volatile cl_int jobError = CL_SUCCESS; // err code return for the job as a whole
+volatile cl_int     jobError = CL_SUCCESS;      // err code return for the job as a whole
 // Condition variable to park caller while waiting
-#if defined(_WIN32)
-HANDLE caller_event;
+#if defined( _WIN32 )
+HANDLE              caller_event;
 #else // !_WIN32
-pthread_mutex_t caller_cond_lock;
-pthread_cond_t caller_cond_var;
+pthread_mutex_t     caller_cond_lock;
+pthread_cond_t      caller_cond_var;
 #endif // !_WIN32
-// # of threads intended to be running. Running threads will decrement this
-// as they discover they've run out of work to do.
-volatile cl_int gRunning = 0;
+volatile cl_int     gRunning = 0;       // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
 // The total number of threads launched.
-volatile cl_int gThreadCount = 0;
+volatile cl_int     gThreadCount = 0;
 #ifdef _WIN32
-void ThreadPool_WorkerFunc(void *p)
+void ThreadPool_WorkerFunc( void *p )
-void *ThreadPool_WorkerFunc(void *p)
+void *ThreadPool_WorkerFunc( void *p )
-    cl_uint threadID = ThreadPool_AtomicAdd((volatile cl_int *)p, 1);
-    cl_int item = ThreadPool_AtomicAdd(&gRunCount, -1);
-    // log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
+    cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
+    cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+//    log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
-    while (MAX_COUNT > item)
+    while( MAX_COUNT > item )
         cl_int err;
         // check for more work to do
-        if (0 >= item)
+        if( 0 >= item )
-            // log_info("Thread %d has run out of work.\n", threadID);
+//            log_info( "Thread %d has run out of work.\n", threadID );
             // No work to do. Attempt to block waiting for work
-#if defined(_WIN32)
-            EnterCriticalSection(cond_lock);
+#if defined( _WIN32 )
+            EnterCriticalSection( cond_lock );
 #else // !_WIN32
-            if ((err = pthread_mutex_lock(&cond_lock)))
+            if((err = pthread_mutex_lock( &cond_lock) ))
-                log_error(
-                    "Error %d from pthread_mutex_lock. Worker %d unable to "
-                    "block waiting for work. ThreadPool_WorkerFunc failed.\n",
-                    err, threadID);
+                log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
                 goto exit;
 #endif // !_WIN32
-            cl_int remaining = ThreadPool_AtomicAdd(&gRunning, -1);
-            // log_info("ThreadPool_WorkerFunc: gRunning = %d\n",
-            //          remaining - 1);
-            if (1 == remaining)
+            cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
+//            log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
+            if( 1 == remaining )
             { // last thread out signal the main thread to wake up
-#if defined(_WIN32)
-                SetEvent(caller_event);
+#if defined( _WIN32 )
+                SetEvent( caller_event );
 #else // !_WIN32
-                if ((err = pthread_mutex_lock(&caller_cond_lock)))
+                if((err = pthread_mutex_lock( &caller_cond_lock) ))
-                    log_error("Error %d from pthread_mutex_lock. Unable to "
-                              "wake caller.\n",
-                              err);
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
                     goto exit;
-                if ((err = pthread_cond_broadcast(&caller_cond_var)))
+                if( (err = pthread_cond_broadcast( &caller_cond_var )))
-                    log_error(
-                        "Error %d from pthread_cond_broadcast. Unable to wake "
-                        "up main thread. ThreadPool_WorkerFunc failed.\n",
-                        err);
+                    log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
                     goto exit;
-                if ((err = pthread_mutex_unlock(&caller_cond_lock)))
+                if((err = pthread_mutex_unlock( &caller_cond_lock) ))
-                    log_error("Error %d from pthread_mutex_lock. Unable to "
-                              "wake caller.\n",
-                              err);
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
                     goto exit;
 #endif // !_WIN32
-            // loop in case we are woken only to discover that some other thread
-            // already did all the work
-            while (0 >= item)
+            // loop in case we are woken only to discover that some other thread already did all the work
+            while( 0 >= item )
-#if defined(_WIN32)
-                _SleepConditionVariableCS(cond_var, cond_lock, INFINITE);
+#if defined( _WIN32 )
+                _SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
 #else // !_WIN32
-                if ((err = pthread_cond_wait(&cond_var, &cond_lock)))
+                if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
-                    log_error(
-                        "Error %d from pthread_cond_wait. Unable to block for "
-                        "waiting for work. ThreadPool_WorkerFunc failed.\n",
-                        err);
-                    pthread_mutex_unlock(&cond_lock);
+                    log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
+                    pthread_mutex_unlock( &cond_lock);
                     goto exit;
 #endif // !_WIN32
                 // try again to get a valid item id
-                item = ThreadPool_AtomicAdd(&gRunCount, -1);
-                if (MAX_COUNT <= item) // exit if we are done
+                item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+                if( MAX_COUNT <= item )  // exit if we are done
-#if defined(_WIN32)
-                    LeaveCriticalSection(cond_lock);
+#if defined( _WIN32 )
+                    LeaveCriticalSection( cond_lock );
 #else // !_WIN32
-                    pthread_mutex_unlock(&cond_lock);
+                    pthread_mutex_unlock( &cond_lock);
 #endif // !_WIN32
                     goto exit;
-            ThreadPool_AtomicAdd(&gRunning, 1);
-            // log_info("Thread %d has found work.\n", threadID);
+            ThreadPool_AtomicAdd( &gRunning, 1 );
+//            log_info( "Thread %d has found work.\n", threadID);
-#if defined(_WIN32)
-            LeaveCriticalSection(cond_lock);
+#if defined( _WIN32 )
+            LeaveCriticalSection( cond_lock );
 #else // !_WIN32
-            if ((err = pthread_mutex_unlock(&cond_lock)))
+            if((err = pthread_mutex_unlock( &cond_lock) ))
-                log_error(
-                    "Error %d from pthread_mutex_unlock. Unable to block for "
-                    "waiting for work. ThreadPool_WorkerFunc failed.\n",
-                    err);
+                log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
                 goto exit;
 #endif // !_WIN32
         // we have a valid item, so do the work
-        // but only if we haven't already encountered an error
-        if (CL_SUCCESS == jobError)
+        if( CL_SUCCESS == jobError )  // but only if we haven't already encountered an error
-            // log_info("Thread %d doing job %d\n", threadID, item - 1);
+//            log_info( "Thread %d doing job %d\n", threadID, item - 1);
 #if defined(__APPLE__) && defined(__arm__)
-            // On most platforms which support denorm, default is FTZ off.
-            // However, on some hardware where the reference is computed,
-            // default might be flush denorms to zero e.g. arm. This creates
-            // issues in result verification. Since spec allows the
-            // implementation to either flush or not flush denorms to zero, an
-            // implementation may choose not be flush i.e. return denorm result
-            // whereas reference result may be zero (flushed denorm). Hence we
-            // need to disable denorm flushing on host side where reference is
-            // being computed to make sure we get non-flushed reference result.
-            // If implementation returns flushed result, we correctly take care
-            // of that in verification code.
+            // On most platforms which support denorm, default is FTZ off. However,
+            // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+            // This creates issues in result verification. Since spec allows the implementation to either flush or
+            // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+            // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+            // where reference is being computed to make sure we get non-flushed reference result. If implementation
+            // returns flushed result, we correctly take care of that in verification code.
             FPU_mode_type oldMode;
-            DisableFTZ(&oldMode);
+            DisableFTZ( &oldMode );
             // Call the user's function with this item ID
-            err = gFunc_ptr(item - 1, threadID, (void *)gUserInfo);
+            err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
 #if defined(__APPLE__) && defined(__arm__)
             // Restore FP state
-            RestoreFPState(&oldMode);
+            RestoreFPState( &oldMode );
-            if (err)
+            if( err )
 #if (__MINGW32__)
-                if (jobError == CL_SUCCESS) jobError = err;
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
                 gRunCount = 0;
-#elif defined(__GNUC__)
-                // GCC extension:
-                //
+#elif defined( __GNUC__ )
+                // GCC extension:
                 // set the new error if we are the first one there.
-                __sync_val_compare_and_swap(&jobError, CL_SUCCESS, err);
+                __sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
                 // drop run count to 0
                 gRunCount = 0;
-#elif defined(_MSC_VER)
+#elif defined( _MSC_VER )
                 // set the new error if we are the first one there.
-                _InterlockedCompareExchange((volatile LONG *)&jobError, err,
-                                            CL_SUCCESS);
+                _InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
                 // drop run count to 0
                 gRunCount = 0;
-                if (pthread_mutex_lock(&gAtomicLock))
-                    log_error(
-                        "Atomic operation failed. "
-                        "pthread_mutex_lock(&gAtomicLock) returned an error\n");
-                if (jobError == CL_SUCCESS) jobError = err;
+                if( pthread_mutex_lock(&gAtomicLock) )
+                    log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
                 gRunCount = 0;
-                if (pthread_mutex_unlock(&gAtomicLock))
-                    log_error("Failed to release gAtomicLock. Further atomic "
-                              "operations may deadlock\n");
+                if( pthread_mutex_unlock(&gAtomicLock) )
+                    log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
         // get the next item
-        item = ThreadPool_AtomicAdd(&gRunCount, -1);
+        item = ThreadPool_AtomicAdd( &gRunCount, -1 );
-    log_info("ThreadPool: thread %d exiting.\n", threadID);
-    ThreadPool_AtomicAdd(&gThreadCount, -1);
+    log_info( "ThreadPool: thread %d exiting.\n", threadID );
+    ThreadPool_AtomicAdd( &gThreadCount, -1 );
 #if !defined(_WIN32)
     return NULL;
 // SetThreadCount() may be used to artifically set the number of worker threads
-// If the value is 0 (the default) the number of threads will be determined
-// based on the number of CPU cores.  If it is a unicore machine, then 2 will be
-// used, so that we still get some testing for thread safety.
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
-// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then
-// the code will run single threaded, but will report an error to indicate that
-// the test is invalid.  This option is intended for debugging purposes only. It
-// is suggested as a convention that test apps set the thread count to 1 in
-// response to the -m flag.
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
-// SetThreadCount() must be called before the first call to GetThreadCount() or
-// ThreadPool_Do(), otherwise the behavior is indefined.
-void SetThreadCount(int count)
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined.
+void        SetThreadCount( int count )
-    if (threadPoolInitErr == CL_SUCCESS)
+    if( threadPoolInitErr == CL_SUCCESS )
-        log_error("Error: It is illegal to set the thread count after the "
-                  "first call to ThreadPool_Do or GetThreadCount\n");
+        log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
@@ -489,42 +443,35 @@
     int err;
     volatile cl_uint threadID = 0;
-    // Check for manual override of multithreading code. We add this for better
-    // debuggability.
-    if (getenv("CL_TEST_SINGLE_THREADED"))
+    // Check for manual override of multithreading code. We add this for better debuggability.
+    if( getenv( "CL_TEST_SINGLE_THREADED" ) )
-        log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
-                  "Running single threaded.\n*** TEST IS INVALID! ***\n");
+        log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
         gThreadCount = 1;
-    // Figure out how many threads to run -- check first for non-zero to give
-    // the implementation the chance
-    if (0 == gThreadCount)
+    // Figure out how many threads to run -- check first for non-zero to give the implementation the chance
+    if( 0 == gThreadCount )
-#if defined(_MSC_VER) || defined(__MINGW64__)
+#if defined(_MSC_VER) || defined (__MINGW64__)
         DWORD length = 0;
-        GetLogicalProcessorInformation(NULL, &length);
-        buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(length);
-        if (buffer != NULL)
+        GetLogicalProcessorInformation( NULL, &length );
+        buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
+        if( buffer != NULL )
-            if (GetLogicalProcessorInformation(buffer, &length) == TRUE)
+            if ( GetLogicalProcessorInformation( buffer, &length ) == TRUE )
                 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
-                while (
-                    ptr
-                    < &buffer[length
-                              / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)])
+                while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
-                    if (ptr->Relationship == RelationProcessorCore)
+                    if( ptr->Relationship == RelationProcessorCore )
-                        // Count the number of bits in ProcessorMask (number of
-                        // logical cores)
+                        // Count the number of bits in ProcessorMask (number of logical cores)
                         ULONG mask = ptr->ProcessorMask;
-                        while (mask)
+                        while( mask )
                             mask &= mask - 1; // Remove 1 bit at a time
@@ -535,71 +482,66 @@
-#elif defined(__MINGW32__)
+#elif defined (__MINGW32__)
-#warning How about this, instead of hard coding it to 2?
+            #warning  How about this, instead of hard coding it to 2?
             SYSTEM_INFO sysinfo;
-            GetSystemInfo(&sysinfo);
+            GetSystemInfo( &sysinfo );
             gThreadCount = sysinfo.dwNumberOfProcessors;
-#elif defined(__linux__) && !defined(__ANDROID__)
-        cpu_set_t affinity;
-        if (0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity))
+#elif defined (__linux__) && !defined(__ANDROID__)
+        cpu_set_t    affinity;
+        if ( 0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity) )
 #if !(defined(CPU_COUNT))
-            gThreadCount = 1;
+        gThreadCount = 1;
             gThreadCount = CPU_COUNT(&affinity);
-            // Hopefully your system returns logical cpus here, as does MacOS X
-            gThreadCount = (cl_int)sysconf(_SC_NPROCESSORS_CONF);
+            gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
-#else /* !_WIN32 */
-        // Hopefully your system returns logical cpus here, as does MacOS X
-        gThreadCount = (cl_int)sysconf(_SC_NPROCESSORS_CONF);
+#else // !_WIN32
+        gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
 #endif // !_WIN32
-        // Multithreaded tests are required to run multithreaded even on unicore
-        // systems so as to test thread safety
-        if (1 == gThreadCount) gThreadCount = 2;
+        // Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
+        if( 1 == gThreadCount )
+            gThreadCount = 2;
-// When working in 32 bit limit the thread number to 12
-// This fix was made due to memory issues in integer_ops test
-// When running integer_ops, the test opens as many threads as the
-// machine has and each thread allocates a fixed amount of memory
-// When running this test on dual socket machine in 32-bit, the
-// process memory is not sufficient and the test fails
-#if defined(_WIN32) && !defined(_M_X64)
-    if (gThreadCount > 12)
-    {
-        gThreadCount = 12;
-    }
+    // When working in 32 bit limit the thread number to 12
+    // This fix was made due to memory issues in integer_ops test
+    // When running integer_ops, the test opens as many threads as the
+    // machine has and each thread allocates a fixed amount of memory
+    // When running this test on dual socket machine in 32-bit, the
+    // process memory is not sufficient and the test fails
+    #if defined(_WIN32) && !defined(_M_X64)
+        if (gThreadCount > 12) {
+            gThreadCount = 12;
+        }
+    #endif
-    // Allow the app to set thread count to <0 for debugging purposes.
-    // This will cause the test to run single threaded.
-    if (gThreadCount < 2)
+    //Allow the app to set thread count to <0 for debugging purposes.  This will cause the test to run single threaded.
+    if( gThreadCount < 2 )
-        log_error("ERROR: Running single threaded because thread count < 2. "
-                  "\n*** TEST IS INVALID! ***\n");
+        log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
         gThreadCount = 1;
-#if defined(_WIN32)
-    InitializeCriticalSection(gThreadPoolLock);
-    InitializeCriticalSection(cond_lock);
-    _InitializeConditionVariable(cond_var);
-    caller_event = CreateEvent(NULL, FALSE, FALSE, NULL);
-#elif defined(__GNUC__)
-    // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since
-    // it might cause problem with some flavors of gcc compilers.
+#if defined( _WIN32 )
+    InitializeCriticalSection( gThreadPoolLock );
+    InitializeCriticalSection( cond_lock );
+    _InitializeConditionVariable( cond_var );
+    caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
+#elif defined (__GNUC__)
+    // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
+    // with some flavors of gcc compilers.
     pthread_cond_init(&cond_var, NULL);
-    pthread_mutex_init(&cond_lock, NULL);
+    pthread_mutex_init(&cond_lock ,NULL);
     pthread_cond_init(&caller_cond_var, NULL);
     pthread_mutex_init(&caller_cond_lock, NULL);
     pthread_mutex_init(&gThreadPoolLock, NULL);
@@ -607,18 +549,15 @@
 #if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
-#elif defined(__MINGW32__)
+#elif defined (__MINGW32__)
-    // Make sure the last thread done in the work pool doesn't signal us to wake
-    // before we get to the point where we are supposed to wait
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
     //  That would cause a deadlock.
-#if !defined(_WIN32)
-    if ((err = pthread_mutex_lock(&caller_cond_lock)))
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
-        log_error("Error %d from pthread_mutex_lock. Unable to block for work "
-                  "to finish. ThreadPool_Init failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
         gThreadCount = 1;
@@ -626,50 +565,45 @@
     gRunning = gThreadCount;
     // init threads
-    for (i = 0; i < gThreadCount; i++)
+    for( i = 0; i < gThreadCount; i++ )
-#if defined(_WIN32)
-        uintptr_t handle =
-            _beginthread(ThreadPool_WorkerFunc, 0, (void *)&threadID);
-        err = (handle == 0);
+#if defined( _WIN32 )
+        uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
+        err = ( handle == 0 );
 #else // !_WIN32
         pthread_t tid = 0;
-        err = pthread_create(&tid, NULL, ThreadPool_WorkerFunc,
-                             (void *)&threadID);
+        err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
 #endif // !_WIN32
-        if (err)
+        if( err )
-            log_error("Error %d launching thread %d\n", err, i);
+            log_error( "Error %d launching thread %d\n", err, i );
             threadPoolInitErr = err;
             gThreadCount = i;
-    atexit(ThreadPool_Exit);
+    atexit( ThreadPool_Exit );
-    // block until they are done launching.
+// block until they are done launching.
-#if defined(_WIN32)
-        WaitForSingleObject(caller_event, INFINITE);
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
 #else // !_WIN32
-        if ((err = pthread_cond_wait(&caller_cond_var, &caller_cond_lock)))
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
-            log_error("Error %d from pthread_cond_wait. Unable to block for "
-                      "work to finish. ThreadPool_Init failed.\n",
-                      err);
-            pthread_mutex_unlock(&caller_cond_lock);
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
 #endif // !_WIN32
-    } while (gRunCount != -gThreadCount);
-#if !defined(_WIN32)
-    if ((err = pthread_mutex_unlock(&caller_cond_lock)))
+    }
+    while( gRunCount != -gThreadCount );
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
-        log_error("Error %d from pthread_mutex_unlock. Unable to block for "
-                  "work to finish. ThreadPool_Init failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
 #endif // !_WIN32
@@ -678,8 +612,7 @@
 #if defined(_MSC_VER)
-static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter,
-                                      PVOID *lpContex)
+static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
     return TRUE;
@@ -691,40 +624,35 @@
     int err, count;
     gRunCount = CL_INT_MAX;
-#if defined(__GNUC__)
-    // GCC extension:
-    //
+#if defined( __GNUC__ )
+    // GCC extension:
-#elif defined(_MSC_VER)
+#elif defined( _MSC_VER )
-#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
+    #warning   If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
     // spin waiting for threads to die
     for (count = 0; 0 != gThreadCount && count < 1000; count++)
-#if defined(_WIN32)
-        _WakeAllConditionVariable(cond_var);
+#if defined( _WIN32 )
+        _WakeAllConditionVariable( cond_var );
 #else // !_WIN32
-        if ((err = pthread_cond_broadcast(&cond_var)))
+        if( (err = pthread_cond_broadcast( &cond_var )))
-            log_error("Error %d from pthread_cond_broadcast. Unable to wake up "
-                      "work threads. ThreadPool_Exit failed.\n",
-                      err);
+            log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
 #endif // !_WIN32
-    if (gThreadCount)
-        log_error("Error: Thread pool timed out after 1 second with %d threads "
-                  "still active.\n",
-                  gThreadCount);
+    if( gThreadCount )
+        log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
-        log_info("Thread pool exited in a orderly fashion.\n");
+        log_info( "Thread pool exited in a orderly fashion.\n" );
@@ -736,123 +664,108 @@
 // can be running at a time. It is not intended for general purpose use.
 // If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
 // all available then it would make more sense to use those features.
-cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo)
+cl_int ThreadPool_Do( TPFuncPtr func_ptr,
+                      cl_uint count,
+                      void *userInfo )
     cl_int newErr;
     cl_int err = 0;
     // Lazily set up our threads
 #if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
-    err = !_InitOnceExecuteOnce(&threadpool_init_control, _ThreadPool_Init,
-                                NULL, NULL);
-#elif defined(_WIN32)
-    if (threadpool_init_control == 0)
-    {
-#warning This is buggy and race prone.  Find a better way.
+    err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
         threadpool_init_control = 1;
-#else // posix platform
-    err = pthread_once(&threadpool_init_control, ThreadPool_Init);
-    if (err)
+#else //posix platform
+    err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
-        log_error("Error %d from pthread_once. Unable to init threads. "
-                  "ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
         return err;
-    // Single threaded code to handle case where threadpool wasn't allocated or
-    // was disabled by environment variable
-    if (threadPoolInitErr)
+    // Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
+    if( threadPoolInitErr )
         cl_uint currentJob = 0;
-        cl_int result = CL_SUCCESS;
+        cl_int  result = CL_SUCCESS;
 #if defined(__APPLE__) && defined(__arm__)
         // On most platforms which support denorm, default is FTZ off. However,
-        // on some hardware where the reference is computed, default might be
-        // flush denorms to zero e.g. arm. This creates issues in result
-        // verification. Since spec allows the implementation to either flush or
-        // not flush denorms to zero, an implementation may choose not be flush
-        // i.e. return denorm result whereas reference result may be zero
-        // (flushed denorm). Hence we need to disable denorm flushing on host
-        // side where reference is being computed to make sure we get
-        // non-flushed reference result. If implementation returns flushed
-        // result, we correctly take care of that in verification code.
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
         FPU_mode_type oldMode;
-        DisableFTZ(&oldMode);
+        DisableFTZ( &oldMode );
-        for (currentJob = 0; currentJob < count; currentJob++)
-            if ((result = func_ptr(currentJob, 0, userInfo)))
+        for( currentJob = 0; currentJob < count; currentJob++ )
+            if((result = func_ptr( currentJob, 0, userInfo )))
 #if defined(__APPLE__) && defined(__arm__)
                 // Restore FP state before leaving
-                RestoreFPState(&oldMode);
+                RestoreFPState( &oldMode );
                 return result;
 #if defined(__APPLE__) && defined(__arm__)
         // Restore FP state before leaving
-        RestoreFPState(&oldMode);
+        RestoreFPState( &oldMode );
         return CL_SUCCESS;
-    if (count >= MAX_COUNT)
+    if( count >= MAX_COUNT )
-        log_error(
-            "Error: ThreadPool_Do count %d >= max threadpool count of %d\n",
-            count, MAX_COUNT);
+        log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
         return -1;
     // Enter critical region
-#if defined(_WIN32)
-    EnterCriticalSection(gThreadPoolLock);
+#if defined( _WIN32 )
+    EnterCriticalSection( gThreadPoolLock );
 #else // !_WIN32
-    if ((err = pthread_mutex_lock(&gThreadPoolLock)))
+    if( (err = pthread_mutex_lock( &gThreadPoolLock )))
         switch (err)
             case EDEADLK:
-                log_error(
-                    "Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do "
-                    "is not designed to work recursively!\n");
+                log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
             case EINVAL:
-                log_error("Error EINVAL returned in ThreadPool_Do(). How did "
-                          "we end up with an invalid gThreadPoolLock?\n");
+                log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
-            default: break;
+            default:
+                break;
         return err;
 #endif // !_WIN32
     // Start modifying the job state observable by worker threads
-#if defined(_WIN32)
-    EnterCriticalSection(cond_lock);
+#if defined( _WIN32 )
+    EnterCriticalSection( cond_lock );
 #else // !_WIN32
-    if ((err = pthread_mutex_lock(&cond_lock)))
+    if((err = pthread_mutex_lock( &cond_lock) ))
-        log_error("Error %d from pthread_mutex_lock. Unable to wake up work "
-                  "threads. ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
         goto exit;
 #endif // !_WIN32
-    // Make sure the last thread done in the work pool doesn't signal us to wake
-    // before we get to the point where we are supposed to wait
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
     //  That would cause a deadlock.
-#if !defined(_WIN32)
-    if ((err = pthread_mutex_lock(&caller_cond_lock)))
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
-        log_error("Error %d from pthread_mutex_lock. Unable to block for work "
-                  "to finish. ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
         goto exit;
 #endif // !_WIN32
@@ -863,50 +776,42 @@
     gFunc_ptr = func_ptr;
     gUserInfo = userInfo;
-#if defined(_WIN32)
+#if defined( _WIN32 )
-    _WakeAllConditionVariable(cond_var);
-    LeaveCriticalSection(cond_lock);
+    _WakeAllConditionVariable( cond_var );
+    LeaveCriticalSection( cond_lock );
 #else // !_WIN32
-    if ((err = pthread_cond_broadcast(&cond_var)))
+    if( (err = pthread_cond_broadcast( &cond_var )))
-        log_error("Error %d from pthread_cond_broadcast. Unable to wake up "
-                  "work threads. ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
         goto exit;
-    if ((err = pthread_mutex_unlock(&cond_lock)))
+    if((err = pthread_mutex_unlock( &cond_lock) ))
-        log_error("Error %d from pthread_mutex_unlock. Unable to wake up work "
-                  "threads. ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
         goto exit;
 #endif // !_WIN32
-    // block until they are done.  It would be slightly more efficient to do
-    // some of the work here though.
+// block until they are done.  It would be slightly more efficient to do some of the work here though.
-#if defined(_WIN32)
-        WaitForSingleObject(caller_event, INFINITE);
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
 #else // !_WIN32
-        if ((err = pthread_cond_wait(&caller_cond_var, &caller_cond_lock)))
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
-            log_error("Error %d from pthread_cond_wait. Unable to block for "
-                      "work to finish. ThreadPool_Do failed.\n",
-                      err);
-            pthread_mutex_unlock(&caller_cond_lock);
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
             goto exit;
 #endif // !_WIN32
-    } while (gRunning);
+    }
+    while( gRunning );
 #if !defined(_WIN32)
-    if ((err = pthread_mutex_unlock(&caller_cond_lock)))
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
-        log_error("Error %d from pthread_mutex_unlock. Unable to block for "
-                  "work to finish. ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
         goto exit;
 #endif // !_WIN32
@@ -915,15 +820,13 @@
     // exit critical region
-#if defined(_WIN32)
-    LeaveCriticalSection(gThreadPoolLock);
+#if defined( _WIN32 )
+    LeaveCriticalSection( gThreadPoolLock );
 #else // !_WIN32
-    newErr = pthread_mutex_unlock(&gThreadPoolLock);
-    if (newErr)
+    newErr = pthread_mutex_unlock( &gThreadPoolLock );
+    if( newErr)
-        log_error("Error %d from pthread_mutex_unlock. Unable to exit critical "
-                  "region. ThreadPool_Do failed.\n",
-                  newErr);
+        log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
         return err;
 #endif // !_WIN32
@@ -931,31 +834,28 @@
     return err;
-cl_uint GetThreadCount(void)
+cl_uint GetThreadCount( void )
     // Lazily set up our threads
 #if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
-    cl_int err = !_InitOnceExecuteOnce(&threadpool_init_control,
-                                       _ThreadPool_Init, NULL, NULL);
-#elif defined(_WIN32)
-    if (threadpool_init_control == 0)
-    {
-#warning This is buggy and race prone.  Find a better way.
+    cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
         threadpool_init_control = 1;
-    cl_int err = pthread_once(&threadpool_init_control, ThreadPool_Init);
-    if (err)
+    cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
-        log_error("Error %d from pthread_once. Unable to init threads. "
-                  "ThreadPool_Do failed.\n",
-                  err);
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
         return err;
 #endif // !_WIN32
-    if (gThreadCount < 1) return 1;
+    if( gThreadCount < 1 )
+        return 1;
     return gThreadCount;
@@ -963,26 +863,24 @@
-#error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
+    #error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
-// We require multithreading in parts of the test as a means of simultaneously
-// testing reentrancy requirements of OpenCL API, while also checking
+// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
+// of OpenCL API, while also checking
-// A sample single threaded implementation follows, for documentation /
-// bootstrapping purposes. It is not okay to use this for conformance testing!!!
+// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
+// It is not okay to use this for conformance testing!!!
-// Exception:  If your operating system does not support multithreaded execution
-// of any kind, then you may use this code.
+// Exception:  If your operating system does not support multithreaded execution of any kind, then you may use this code.
-cl_int ThreadPool_AtomicAdd(volatile cl_int *a, cl_int b)
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
     cl_uint r = *a;
-    // since this fallback code path is not multithreaded, we just do a regular
-    // add here. If your operating system supports memory-barrier-atomics, use
-    // those here.
+    // since this fallback code path is not multithreaded, we just do a regular add here
+    // If your operating system supports memory-barrier-atomics, use those here
     *a = r + b;
     return r;
@@ -991,38 +889,44 @@
 // Blocking API that farms out count jobs to a thread pool.
 // It may return with some work undone if func_ptr() returns a non-zero
 // result.
-cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo)
+cl_int ThreadPool_Do(   TPFuncPtr func_ptr,
+                        cl_uint count,
+                        void *userInfo )
     cl_uint currentJob = 0;
-    cl_int result = CL_SUCCESS;
+    cl_int  result = CL_SUCCESS;
-    log_error("ERROR:  Test must be multithreaded!\n");
+    log_error( "ERROR:  Test must be multithreaded!\n" );
     static int spewCount = 0;
-    if (0 == spewCount)
+    if( 0 == spewCount )
-        log_info("\nWARNING:  The operating system is claimed not to support "
-                 "threads of any sort. Running single threaded.\n");
+        log_info( "\nWARNING:  The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
         spewCount = 1;
-    // The multithreaded code should mimic this behavior:
-    for (currentJob = 0; currentJob < count; currentJob++)
-        if ((result = func_ptr(currentJob, 0, userInfo))) return result;
+// The multithreaded code should mimic this behavior:
+    for( currentJob = 0; currentJob < count; currentJob++ )
+        if((result = func_ptr( currentJob, 0, userInfo )))
+            return result;
     return CL_SUCCESS;
-cl_uint GetThreadCount(void) { return 1; }
-void SetThreadCount(int count)
+cl_uint GetThreadCount( void )
-    if (count > 1) log_info("WARNING: SetThreadCount(%d) ignored\n", count);
+    return 1;
+void SetThreadCount( int count )
+    if( count > 1 )
+        log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
diff --git a/test_common/harness/ThreadPool.h b/test_common/harness/ThreadPool.h
index 2ef07b4..2fa9c7b 100644
--- a/test_common/harness/ThreadPool.h
+++ b/test_common/harness/ThreadPool.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,54 +16,53 @@
 #ifndef THREAD_POOL_H
 #define THREAD_POOL_H
-#if defined(__APPLE__)
-#include <OpenCL/opencl.h>
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
-#include <CL/cl.h>
+    #include <CL/cl.h>
 // An atomic add operator
-cl_int ThreadPool_AtomicAdd(volatile cl_int *a, cl_int b); // returns old value
+cl_int     ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b );    // returns old value
 // Your function prototype
-// A function pointer to the function you want to execute in a multithreaded
-// context.  No synchronization primitives are provided, other than the atomic
-// add above. You may not call ThreadPool_Do from your function.
-// ThreadPool_AtomicAdd() and GetThreadCount() should work, however.
+// A function pointer to the function you want to execute in a multithreaded context.  No
+// synchronization primitives are provided, other than the atomic add above. You may not
+// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
+// work, however.
-// job ids and thread ids are 0 based.  If number of jobs or threads was 8, they
-// will numbered be 0 through 7. Note that while every job will be run, it is
-// not guaranteed that every thread will wake up before the work is done.
-typedef cl_int (*TPFuncPtr)(cl_uint /*job_id*/, cl_uint /* thread_id */,
-                            void *userInfo);
+// job ids and thread ids are 0 based.  If number of jobs or threads was 8, they will numbered be 0 through 7.
+// Note that while every job will be run, it is not guaranteed that every thread will wake up before
+// the work is done.
+typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
 // returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
 // Some workitems may not run if a non-zero result is returned from func_ptr().
 // This function may not be called from a TPFuncPtr.
-cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo);
+cl_int      ThreadPool_Do(  TPFuncPtr func_ptr,
+                            cl_uint count,
+                            void *userInfo );
-// Returns the number of worker threads that underlie the threadpool.  The value
-// passed as the TPFuncPtrs thread_id will be between 0 and this value less one,
-// inclusive. This is safe to call from a TPFuncPtr.
-cl_uint GetThreadCount(void);
+// Returns the number of worker threads that underlie the threadpool.  The value passed
+// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
+// This is safe to call from a TPFuncPtr.
+cl_uint     GetThreadCount( void );
 // SetThreadCount() may be used to artifically set the number of worker threads
-// If the value is 0 (the default) the number of threads will be determined
-// based on the number of CPU cores.  If it is a unicore machine, then 2 will be
-// used, so that we still get some testing for thread safety.
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
-// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then
-// the code will run single threaded, but will report an error to indicate that
-// the test is invalid.  This option is intended for debugging purposes only. It
-// is suggested as a convention that test apps set the thread count to 1 in
-// response to the -m flag.
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
-// SetThreadCount() must be called before the first call to GetThreadCount() or
-// ThreadPool_Do(), otherwise the behavior is indefined. It may not be called
-// from a TPFuncPtr.
-void SetThreadCount(int count);
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
+void        SetThreadCount( int count );
-#endif /* THREAD_POOL_H  */
+#endif  /* THREAD_POOL_H  */
diff --git a/test_common/harness/alloc.h b/test_common/harness/alloc.h
index 653dde0..33e6bd8 100644
--- a/test_common/harness/alloc.h
+++ b/test_common/harness/alloc.h
@@ -17,7 +17,7 @@
-#if defined(__linux__) || defined(linux) || defined(__APPLE__)
+#if defined(__linux__) || defined (linux) || defined(__APPLE__)
 #if defined(__ANDROID__)
 #include <malloc.h>
@@ -29,41 +29,43 @@
 #include "mingw_compat.h"
-static void* align_malloc(size_t size, size_t alignment)
+static void * align_malloc(size_t size, size_t alignment)
 #if defined(_WIN32) && defined(_MSC_VER)
     return _aligned_malloc(size, alignment);
-#elif defined(__linux__) || defined(linux) || defined(__APPLE__)
-    void* ptr = NULL;
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    void * ptr = NULL;
 #if defined(__ANDROID__)
     ptr = memalign(alignment, size);
-    if (ptr) return ptr;
+    if ( ptr )
+        return ptr;
-    if (alignment < sizeof(void*))
-    {
+    if (alignment < sizeof(void*)) {
         alignment = sizeof(void*);
-    if (0 == posix_memalign(&ptr, alignment, size)) return ptr;
+    if (0 == posix_memalign(&ptr, alignment, size))
+        return ptr;
     return NULL;
 #elif defined(__MINGW32__)
     return __mingw_aligned_malloc(size, alignment);
-#error "Please add support OS for aligned malloc"
+    #error "Please add support OS for aligned malloc"
-static void align_free(void* ptr)
+static void align_free(void * ptr)
 #if defined(_WIN32) && defined(_MSC_VER)
-#elif defined(__linux__) || defined(linux) || defined(__APPLE__)
-    return free(ptr);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    return  free(ptr);
 #elif defined(__MINGW32__)
     return __mingw_aligned_free(ptr);
-#error "Please add support OS for aligned free"
+    #error "Please add support OS for aligned free"
 #endif // #ifndef HARNESS_ALLOC_H_
diff --git a/test_common/harness/clImageHelper.h b/test_common/harness/clImageHelper.h
index 3019ff3..dfa05ca 100644
--- a/test_common/harness/clImageHelper.h
+++ b/test_common/harness/clImageHelper.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -26,254 +26,257 @@
 #include "errorHelpers.h"
-// helper function to replace clCreateImage2D , to make the existing code use
-// the functions of version 1.2 and veriosn 1.1  respectively
+  // helper function to replace clCreateImage2D , to make the existing code use
+  // the functions of version 1.2 and veriosn 1.1  respectively
-static inline cl_mem create_image_2d(cl_context context, cl_mem_flags flags,
-                                     const cl_image_format *image_format,
-                                     size_t image_width, size_t image_height,
-                                     size_t image_row_pitch, void *host_ptr,
-                                     cl_int *errcode_ret)
+  static inline cl_mem create_image_2d  (cl_context context,
+                           cl_mem_flags flags,
+                           const cl_image_format *image_format,
+                           size_t image_width,
+                           size_t image_height,
+                           size_t image_row_pitch,
+                           void *host_ptr,
+                           cl_int *errcode_ret)
+  {
     cl_mem mImage = NULL;
-    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
-    {
-        host_ptr = NULL;
-    }
 #ifdef CL_VERSION_1_2
     cl_image_desc image_desc_dest;
-    image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;
+    image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
     image_desc_dest.image_width = image_width;
     image_desc_dest.image_height = image_height;
-    image_desc_dest.image_depth = 0; // not usedfor 2d
-    image_desc_dest.image_array_size = 0; // not used for 2d
+    image_desc_dest.image_depth= 0;// not usedfor 2d
+    image_desc_dest.image_array_size = 0;// not used for 2d
     image_desc_dest.image_row_pitch = image_row_pitch;
     image_desc_dest.image_slice_pitch = 0;
     image_desc_dest.num_mip_levels = 0;
     image_desc_dest.num_samples = 0;
-    image_desc_dest.mem_object =
-        NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in
-              // CL_VERSION_1_1, so always is NULL
-    mImage = clCreateImage(context, flags, image_format, &image_desc_dest,
-                           host_ptr, errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    image_desc_dest.mem_object = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
-    mImage =
-        clCreateImage2D(context, flags, image_format, image_width, image_height,
-                        image_row_pitch, host_ptr, errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
+    mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
     return mImage;
+  }
-// helper function to replace clCreateImage2D , to make the existing code use
-// the functions of version 1.2 and veriosn 1.1  respectively
+    // helper function to replace clCreateImage2D , to make the existing code use
+    // the functions of version 1.2 and veriosn 1.1  respectively
-static inline cl_mem
-create_image_2d_buffer(cl_context context, cl_mem_flags flags,
-                       const cl_image_format *image_format, size_t image_width,
-                       size_t image_height, size_t image_row_pitch,
-                       cl_mem buffer, cl_int *errcode_ret)
-    cl_mem mImage = NULL;
-    cl_image_desc image_desc_dest;
-    image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;
-    image_desc_dest.image_width = image_width;
-    image_desc_dest.image_height = image_height;
-    image_desc_dest.image_depth = 0; // not usedfor 2d
-    image_desc_dest.image_array_size = 0; // not used for 2d
-    image_desc_dest.image_row_pitch = image_row_pitch;
-    image_desc_dest.image_slice_pitch = 0;
-    image_desc_dest.num_mip_levels = 0;
-    image_desc_dest.num_samples = 0;
-    image_desc_dest.mem_object = buffer;
-    mImage = clCreateImage(context, flags, image_format, &image_desc_dest, NULL,
-                           errcode_ret);
-    if (errcode_ret && (*errcode_ret))
+    static inline cl_mem create_image_2d_buffer  (cl_context context,
+                                    cl_mem_flags flags,
+                                    const cl_image_format *image_format,
+                                    size_t image_width,
+                                    size_t image_height,
+                                    size_t image_row_pitch,
+                                    cl_mem buffer,
+                                    cl_int *errcode_ret)
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        cl_mem mImage = NULL;
+        cl_image_desc image_desc_dest;
+        image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
+        image_desc_dest.image_width = image_width;
+        image_desc_dest.image_height = image_height;
+        image_desc_dest.image_depth= 0;// not usedfor 2d
+        image_desc_dest.image_array_size = 0;// not used for 2d
+        image_desc_dest.image_row_pitch = image_row_pitch;
+        image_desc_dest.image_slice_pitch = 0;
+        image_desc_dest.num_mip_levels = 0;
+        image_desc_dest.num_samples = 0;
+        image_desc_dest.mem_object = buffer;
+        mImage = clCreateImage( context, flags, image_format, &image_desc_dest, NULL, errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+        return mImage;
-    return mImage;
-static inline cl_mem create_image_3d(cl_context context, cl_mem_flags flags,
-                                     const cl_image_format *image_format,
-                                     size_t image_width, size_t image_height,
-                                     size_t image_depth, size_t image_row_pitch,
-                                     size_t image_slice_pitch, void *host_ptr,
-                                     cl_int *errcode_ret)
+  static inline cl_mem create_image_3d (cl_context context,
+                          cl_mem_flags flags,
+                          const cl_image_format *image_format,
+                          size_t image_width,
+                          size_t image_height,
+                          size_t image_depth,
+                          size_t image_row_pitch,
+                          size_t image_slice_pitch,
+                          void *host_ptr,
+                          cl_int *errcode_ret)
+  {
     cl_mem mImage;
-    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
-    {
-        host_ptr = NULL;
-    }
 #ifdef CL_VERSION_1_2
     cl_image_desc image_desc;
     image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
     image_desc.image_width = image_width;
     image_desc.image_height = image_height;
     image_desc.image_depth = image_depth;
-    image_desc.image_array_size = 0; // not used for one image
+    image_desc.image_array_size = 0;// not used for one image
     image_desc.image_row_pitch = image_row_pitch;
     image_desc.image_slice_pitch = image_slice_pitch;
     image_desc.num_mip_levels = 0;
     image_desc.num_samples = 0;
-    image_desc.mem_object =
-        NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in
-              // CL_VERSION_1_1, so always is NULL
-    mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
-                           errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    image_desc.mem_object = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context,
+                           flags,
+                           image_format,
+                           &image_desc,
+                           host_ptr,
+                           errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
-    mImage = clCreateImage3D(context, flags, image_format, image_width,
-                             image_height, image_depth, image_row_pitch,
-                             image_slice_pitch, host_ptr, errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
+    mImage = clCreateImage3D( context,
+                             flags, image_format,
+                             image_width,
+                             image_height,
+                             image_depth,
+                             image_row_pitch,
+                             image_slice_pitch,
+                             host_ptr,
+                             errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
     return mImage;
+  }
-static inline cl_mem
-create_image_2d_array(cl_context context, cl_mem_flags flags,
-                      const cl_image_format *image_format, size_t image_width,
-                      size_t image_height, size_t image_array_size,
-                      size_t image_row_pitch, size_t image_slice_pitch,
-                      void *host_ptr, cl_int *errcode_ret)
-    cl_mem mImage;
-    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
+    static inline cl_mem create_image_2d_array (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_height,
+                                   size_t image_array_size,
+                                   size_t image_row_pitch,
+                                   size_t image_slice_pitch,
+                                   void *host_ptr,
+                                   cl_int *errcode_ret)
-        host_ptr = NULL;
+        cl_mem mImage;
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = image_height;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+        return mImage;
-    cl_image_desc image_desc;
-    image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
-    image_desc.image_width = image_width;
-    image_desc.image_height = image_height;
-    image_desc.image_depth = 1;
-    image_desc.image_array_size = image_array_size;
-    image_desc.image_row_pitch = image_row_pitch;
-    image_desc.image_slice_pitch = image_slice_pitch;
-    image_desc.num_mip_levels = 0;
-    image_desc.num_samples = 0;
-    image_desc.mem_object = NULL;
-    mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
-                           errcode_ret);
-    if (errcode_ret && (*errcode_ret))
+    static inline cl_mem create_image_1d_array (cl_context context,
+                                         cl_mem_flags flags,
+                                         const cl_image_format *image_format,
+                                         size_t image_width,
+                                         size_t image_array_size,
+                                         size_t image_row_pitch,
+                                         size_t image_slice_pitch,
+                                         void *host_ptr,
+                                         cl_int *errcode_ret)
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        cl_mem mImage;
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+        return mImage;
-    return mImage;
-static inline cl_mem create_image_1d_array(
-    cl_context context, cl_mem_flags flags, const cl_image_format *image_format,
-    size_t image_width, size_t image_array_size, size_t image_row_pitch,
-    size_t image_slice_pitch, void *host_ptr, cl_int *errcode_ret)
-    cl_mem mImage;
-    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
+    static inline cl_mem create_image_1d (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_row_pitch,
+                                   void *host_ptr,
+                                   cl_mem buffer,
+                                   cl_int *errcode_ret)
-        host_ptr = NULL;
+        cl_mem mImage;
+        cl_image_desc image_desc;
+        image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = 0;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = buffer;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+        return mImage;
-    cl_image_desc image_desc;
-    image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
-    image_desc.image_width = image_width;
-    image_desc.image_height = 1;
-    image_desc.image_depth = 1;
-    image_desc.image_array_size = image_array_size;
-    image_desc.image_row_pitch = image_row_pitch;
-    image_desc.image_slice_pitch = image_slice_pitch;
-    image_desc.num_mip_levels = 0;
-    image_desc.num_samples = 0;
-    image_desc.mem_object = NULL;
-    mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
-                           errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
-    }
-    return mImage;
-static inline cl_mem create_image_1d(cl_context context, cl_mem_flags flags,
-                                     const cl_image_format *image_format,
-                                     size_t image_width, size_t image_row_pitch,
-                                     void *host_ptr, cl_mem buffer,
-                                     cl_int *errcode_ret)
-    cl_mem mImage;
-    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)))
-    {
-        host_ptr = NULL;
-    }
-    cl_image_desc image_desc;
-    image_desc.image_type =
-    image_desc.image_width = image_width;
-    image_desc.image_height = 1;
-    image_desc.image_depth = 1;
-    image_desc.image_row_pitch = image_row_pitch;
-    image_desc.image_slice_pitch = 0;
-    image_desc.num_mip_levels = 0;
-    image_desc.num_samples = 0;
-    image_desc.mem_object = buffer;
-    mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
-                           errcode_ret);
-    if (errcode_ret && (*errcode_ret))
-    {
-        // Log an info message and rely on the calling function to produce an
-        // error if necessary.
-        log_info("clCreateImage failed (%d)\n", *errcode_ret);
-    }
-    return mImage;
diff --git a/test_common/harness/cl_offline_compiler-interface.txt b/test_common/harness/cl_offline_compiler-interface.txt
index fd6997d..30ab182 100644
--- a/test_common/harness/cl_offline_compiler-interface.txt
+++ b/test_common/harness/cl_offline_compiler-interface.txt
@@ -23,5 +23,3 @@
    CL_DEVICE_EXTENSIONS="<space separated list of CL extensions>"
    CL_DEVICE_IL_VERSION="<space separated list of IL versions>"
    CL_DEVICE_VERSION="OpenCL <version> <vendor info>"
-   CL_DEVICE_NAME="device name"
diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h
index 7aad15a..3858a7c 100644
--- a/test_common/harness/compat.h
+++ b/test_common/harness/compat.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,14 +16,14 @@
 #ifndef _COMPAT_H_
 #define _COMPAT_H_
-#if defined(_WIN32) && defined(_MSC_VER)
+#if defined(_WIN32) && defined (_MSC_VER)
 #include <Windows.h>
 #ifdef __cplusplus
-#define EXTERN_C extern "C"
+    #define EXTERN_C extern "C"
-#define EXTERN_C
+    #define EXTERN_C
@@ -31,11 +31,11 @@
 // stdlib.h
-#include <stdlib.h> // On Windows, _MAX_PATH defined there.
+#include <stdlib.h>     // On Windows, _MAX_PATH defined there.
 // llabs appeared in MS C v16 (VS 10/2010).
-#if defined(_MSC_VER) && _MSC_VER <= 1500
-EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
+#if defined( _MSC_VER ) && _MSC_VER <= 1500
+    EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
@@ -44,15 +44,16 @@
 // stdbool.h appeared in MS C v18 (VS 12/2013).
-#if defined(_MSC_VER) && MSC_VER <= 1700
+#if defined( _MSC_VER ) && MSC_VER <= 1700
 #if !defined(__cplusplus)
 typedef char bool;
-#define true 1
-#define false 0
+        #define true  1
+        #define false 0
+    #endif
-#include <stdbool.h>
-#endif // defined(_MSC_VER) && MSC_VER <= 1700
+    #include <stdbool.h>
@@ -60,25 +61,24 @@
 // stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
-#if defined(_MSC_VER)                                                          \
-    && (!defined(__INTEL_COMPILER) && _MSC_VER <= 1500                         \
-        || defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1200)
-typedef unsigned char uint8_t;
-typedef char int8_t;
-typedef unsigned short uint16_t;
-typedef short int16_t;
-typedef unsigned int uint32_t;
-typedef int int32_t;
-typedef unsigned long long uint64_t;
-typedef long long int64_t;
+#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
+typedef unsigned char       uint8_t;
+typedef char                int8_t;
+typedef unsigned short      uint16_t;
+typedef short               int16_t;
+typedef unsigned int        uint32_t;
+typedef int                 int32_t;
+typedef unsigned long long  uint64_t;
+typedef long long           int64_t;
-#include <stdint.h>
+    #include <stdint.h>
 // float.h
@@ -86,23 +86,24 @@
 #include <float.h>
 // fenv.h
 // fenv.h appeared in MS C v18 (VS 12/2013).
-#if defined(_MSC_VER) && _MSC_VER <= 1700 && !defined(__INTEL_COMPILER)
-// reimplement fenv.h because windows doesn't have it
-#define FE_INEXACT 0x0020
-#define FE_UNDERFLOW 0x0010
-#define FE_OVERFLOW 0x0008
-#define FE_DIVBYZERO 0x0004
-#define FE_INVALID 0x0001
-#define FE_ALL_EXCEPT 0x003D
-int fetestexcept(int excepts);
-int feclearexcept(int excepts);
+#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
+    // reimplement fenv.h because windows doesn't have it
+    #define FE_INEXACT          0x0020
+    #define FE_UNDERFLOW        0x0010
+    #define FE_OVERFLOW         0x0008
+    #define FE_DIVBYZERO        0x0004
+    #define FE_INVALID          0x0001
+    #define FE_ALL_EXCEPT       0x003D
+    int fetestexcept(int excepts);
+    int feclearexcept(int excepts);
-#include <fenv.h>
+    #include <fenv.h>
@@ -110,137 +111,138 @@
 // math.h
-#if defined(__INTEL_COMPILER)
-#include <mathimf.h>
+#if defined( __INTEL_COMPILER )
+    #include <mathimf.h>
-#include <math.h>
+    #include <math.h>
 #ifndef M_PI
-#define M_PI 3.14159265358979323846264338327950288
+    #define M_PI 3.14159265358979323846264338327950288
-#if defined(_MSC_VER)
+#if defined( _MSC_VER )
-#ifdef __cplusplus
-extern "C" {
+    #ifdef __cplusplus
+        extern "C" {
+    #endif
+    #ifndef NAN
+        #define NAN  (INFINITY - INFINITY)
+    #endif
+    #ifndef HUGE_VALF
+        #define HUGE_VALF (float)HUGE_VAL
+    #endif
+    #ifndef INFINITY
+        #define INFINITY    (FLT_MAX + FLT_MAX)
+    #endif
+    #ifndef isfinite
+        #define isfinite(x) _finite(x)
+    #endif
+    #ifndef isnan
+        #define    isnan( x )       ((x) != (x))
+    #endif
+    #ifndef isinf
+        #define     isinf( _x)      ((_x) == INFINITY || (_x) == -INFINITY)
+    #endif
+    #if _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
+        double rint( double x);
+        float  rintf( float x);
+        long double rintl( long double x);
+        float cbrtf( float );
+        double cbrt( double );
+        int    ilogb( double x);
+        int    ilogbf (float x);
+        int    ilogbl(long double x);
+        double fmax(double x, double y);
+        double fmin(double x, double y);
+        float  fmaxf( float x, float y );
+        float  fminf(float x, float y);
+        double      log2(double x);
+        long double log2l(long double x);
+        double      exp2(double x);
+        long double exp2l(long double x);
+        double      fdim(double x, double y);
+        float       fdimf(float x, float y);
+        long double fdiml(long double x, long double y);
+        double      remquo( double x, double y, int *quo);
+        float       remquof( float x, float y, int *quo);
+        long double remquol( long double x, long double y, int *quo);
+        long double scalblnl(long double x, long n);
+        float hypotf(float x, float y);
+        long double hypotl(long double x, long double y) ;
+        double lgamma(double x);
+        float  lgammaf(float x);
+        double trunc(double x);
+        float  truncf(float x);
+        double log1p(double x);
+        float  log1pf(float x);
+        long double log1pl(long double x);
+        double copysign(double x, double y);
+        float  copysignf(float x, float y);
+        long double copysignl(long double x, long double y);
+        long lround(double x);
+        long lroundf(float x);
+        //long lroundl(long double x)
+        double round(double x);
+        float  roundf(float x);
+        long double roundl(long double x);
+        int cf_signbit(double x);
+        int cf_signbitf(float x);
+        // Added in _MSC_VER == 1800 (Visual Studio 2013)
+        #if _MSC_VER < 1800
+                static int signbit(double x) { return  cf_signbit(x); }
+        #endif
+        static int signbitf(float x) { return cf_signbitf(x); }
+        long int lrint (double flt);
+        long int lrintf (float flt);
+        float   int2float (int32_t ix);
+        int32_t float2int (float   fx);
+    #endif // _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
+    #if _MSC_VER < 1900 && ( ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 )
+        // These functions appeared in Intel C v13 and Visual Studio 2015
+        float  nanf( const char* str);
+        double nan( const char* str);
+        long double nanl( const char* str);
+    #endif
+    #ifdef __cplusplus
+        }
+    #endif
-#ifndef NAN
+#if defined( __ANDROID__ )
+    #define log2(X)  (log(X)/log(2))
-#ifndef HUGE_VALF
-#define HUGE_VALF (float)HUGE_VAL
-#ifndef INFINITY
-#ifndef isfinite
-#define isfinite(x) _finite(x)
-#ifndef isnan
-#define isnan(x) ((x) != (x))
-#ifndef isinf
-#define isinf(_x) ((_x) == INFINITY || (_x) == -INFINITY)
-#if _MSC_VER < 1900 && !defined(__INTEL_COMPILER)
-double rint(double x);
-float rintf(float x);
-long double rintl(long double x);
-float cbrtf(float);
-double cbrt(double);
-int ilogb(double x);
-int ilogbf(float x);
-int ilogbl(long double x);
-double fmax(double x, double y);
-double fmin(double x, double y);
-float fmaxf(float x, float y);
-float fminf(float x, float y);
-double log2(double x);
-long double log2l(long double x);
-double exp2(double x);
-long double exp2l(long double x);
-double fdim(double x, double y);
-float fdimf(float x, float y);
-long double fdiml(long double x, long double y);
-double remquo(double x, double y, int* quo);
-float remquof(float x, float y, int* quo);
-long double remquol(long double x, long double y, int* quo);
-long double scalblnl(long double x, long n);
-float hypotf(float x, float y);
-long double hypotl(long double x, long double y);
-double lgamma(double x);
-float lgammaf(float x);
-double trunc(double x);
-float truncf(float x);
-double log1p(double x);
-float log1pf(float x);
-long double log1pl(long double x);
-double copysign(double x, double y);
-float copysignf(float x, float y);
-long double copysignl(long double x, long double y);
-long lround(double x);
-long lroundf(float x);
-// long lroundl(long double x)
-double round(double x);
-float roundf(float x);
-long double roundl(long double x);
-int cf_signbit(double x);
-int cf_signbitf(float x);
-// Added in _MSC_VER == 1800 (Visual Studio 2013)
-#if _MSC_VER < 1800
-static int signbit(double x) { return cf_signbit(x); }
-static int signbitf(float x) { return cf_signbitf(x); }
-long int lrint(double flt);
-long int lrintf(float flt);
-float int2float(int32_t ix);
-int32_t float2int(float fx);
-#endif // _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
-#if _MSC_VER < 1900 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER < 1300)
-// These functions appeared in Intel C v13 and Visual Studio 2015
-float nanf(const char* str);
-double nan(const char* str);
-long double nanl(const char* str);
-#ifdef __cplusplus
-#endif // defined(_MSC_VER)
-#if defined(__ANDROID__)
-#define log2(X) (log(X) / log(2))
@@ -248,11 +250,12 @@
 #if defined(_MSC_VER)
-// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
-#if _MSC_VER < 1900
-#define snprintf sprintf_s
+	// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
+	#if _MSC_VER < 1900
+		#define snprintf   sprintf_s
+	#endif
-#endif // defined(_MSC_VER)
@@ -260,32 +263,35 @@
 #if defined(_MSC_VER)
-#define strtok_r strtok_s
+    #define strtok_r strtok_s
 // unistd.h
-#if defined(_MSC_VER)
-EXTERN_C unsigned int sleep(unsigned int sec);
-EXTERN_C int usleep(int usec);
+#if defined( _MSC_VER )
+    EXTERN_C unsigned int sleep( unsigned int sec );
+    EXTERN_C int usleep( int usec );
 // syscall.h
-#if defined(__ANDROID__)
-// Android bionic's isn't providing SYS_sysctl wrappers.
-#define SYS__sysctl __NR__sysctl
+#if defined( __ANDROID__ )
+    // Android bionic's isn't providing SYS_sysctl wrappers.
+    #define SYS__sysctl  __NR__sysctl
 // Some tests use _malloca which defined in malloc.h.
-#if !defined(__APPLE__)
+#if !defined (__APPLE__)
 #include <malloc.h>
@@ -294,115 +300,104 @@
 // ???
-#if defined(_MSC_VER)
+#if defined( _MSC_VER )
-EXTERN_C uint64_t ReadTime(void);
-EXTERN_C double SubtractTime(uint64_t endTime, uint64_t startTime);
+    EXTERN_C uint64_t ReadTime( void );
+    EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
 /** Returns the number of leading 0-bits in x,
     starting at the most significant bit position.
     If x is 0, the result is undefined.
-EXTERN_C int __builtin_clz(unsigned int pattern);
+    EXTERN_C int __builtin_clz(unsigned int pattern);
 #ifndef MIN
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+    #define MIN(x,y) (((x)<(y))?(x):(y))
 #ifndef MAX
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+    #define MAX(x,y) (((x)>(y))?(x):(y))
+    ------------------------------------------------------------------------------------------------
-   This is a typical usage of the macros:
+    This is a typical usage of the macros:
-     double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
+        double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
-   (taken from math_brute_force/reference_math.c). There are two problems:
+     (taken from math_brute_force/reference_math.c). There are two problems:
-     1. There is an error here. On Windows in will produce incorrect result
-        `0x1.5555555555555p+50'.
-        To have a correct result it should be written as:
-           MAKE_HEX_DOUBLE(0x1.5555555555555p-2, 0x15555555555555LL, -54)
-        A proper value of the third argument is not obvious -- sometimes it
-        should be the same as exponent of the first argument, but sometimes
-        not.
+        1.  There is an error here. On Windows in will produce incorrect result
+            `0x1.5555555555555p+50'. To have a correct result it should be written as
+            `MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
+            third argument is not obvious -- sometimes it should be the same as exponent of the
+            first argument, but sometimes not.
-     2. Information is duplicated. It is easy to make a mistake.
+        2.  Information is duplicated. It is easy to make a mistake.
-   Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead
-   (see them in the bottom of the file).
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+    Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
+    ------------------------------------------------------------------------------------------------
+#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
-#define MAKE_HEX_FLOAT(x, y, z) ((float)ldexp((float)(y), z))
-#define MAKE_HEX_DOUBLE(x, y, z) ldexp((double)(y), z)
-#define MAKE_HEX_LONG(x, y, z) ((long double)ldexp((long double)(y), z))
+    #define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
+    #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
+    #define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
 // Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
-#define MAKE_HEX_FLOAT(x, y, z) x
-#define MAKE_HEX_DOUBLE(x, y, z) x
-#define MAKE_HEX_LONG(x, y, z) x
+#define MAKE_HEX_FLOAT(x,y,z) x
+#define MAKE_HEX_DOUBLE(x,y,z) x
+#define MAKE_HEX_LONG(x,y,z) x
-   HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type
-   float, double, long double respectively. Arguments:
+    ------------------------------------------------------------------------------------------------
+    HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
+    double respectively. Arguments:
-      sm    -- sign of number,
-      int   -- integer part of mantissa (without `0x' prefix),
-      fract -- fractional part of mantissa (without decimal point and `L' or
-            `LL' suffixes),
-      se    -- sign of exponent,
-      exp   -- absolute value of (binary) exponent.
+        sm    -- sign of number,
+        int   -- integer part of mantissa (without `0x' prefix),
+        fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
+        se    -- sign of exponent,
+        exp   -- absolute value of (binary) exponent.
-   Example:
+    Example:
-      double yhi = HEX_DBL(+, 1, 5555555555555, -, 2); // 0x1.5555555555555p-2
+        double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
-   Note:
+    Note:
-      We have to pass signs as separate arguments because gcc pass negative
-   integer values (e. g. `-2') into a macro as two separate tokens, so
-   `HEX_FLT(1, 0, -2)' produces result `0x1.0p- 2' (note a space between minus
-   and two) which is not a correct floating point literal.
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-// If compiler does not support hex floating point literals:
-#define HEX_FLT(sm, int, fract, se, exp)                                       \
-    sm ldexpf((float)(0x##int##fract##UL),                                     \
-              se exp + ilogbf((float)0x##int)                                  \
-                  - ilogbf((float)(0x##int##fract##UL)))
-#define HEX_DBL(sm, int, fract, se, exp)                                       \
-    sm ldexp((double)(0x##int##fract##ULL),                                    \
-             se exp + ilogb((double)0x##int)                                   \
-                 - ilogb((double)(0x##int##fract##ULL)))
-#define HEX_LDBL(sm, int, fract, se, exp)                                      \
-    sm ldexpl((long double)(0x##int##fract##ULL),                              \
-              se exp + ilogbl((long double)0x##int)                            \
-                  - ilogbl((long double)(0x##int##fract##ULL)))
+        We have to pass signs as separate arguments because gcc pass negative integer values
+        (e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
+        `0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
+        literal.
+    ------------------------------------------------------------------------------------------------
+#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    // If compiler does not support hex floating point literals:
+    #define HEX_FLT(  sm, int, fract, se, exp ) sm ldexpf(       (float)( 0x ## int ## fract ## UL  ), se exp + ilogbf(       (float) 0x ## int ) - ilogbf(       ( float )( 0x ## int ## fract ## UL  ) ) )
+    #define HEX_DBL(  sm, int, fract, se, exp ) sm ldexp(       (double)( 0x ## int ## fract ## ULL ), se exp + ilogb(       (double) 0x ## int ) - ilogb(       ( double )( 0x ## int ## fract ## ULL ) ) )
+    #define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
-// If compiler supports hex floating point literals: just concatenate all the
-// parts into a literal.
-#define HEX_FLT(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##F
-#define HEX_DBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp
-#define HEX_LDBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##L
+    // If compiler supports hex floating point literals: just concatenate all the parts into a literal.
+    #define HEX_FLT(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
+    #define HEX_DBL(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
+    #define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
 #if defined(__MINGW32__)
-#include <Windows.h>
-#define sleep(sec) Sleep((sec)*1000)
+    #include <Windows.h>
+    #define sleep(sec) Sleep((sec) * 1000)
 #endif // _COMPAT_H_
diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp
index fc3317c..72fd8cb 100644
--- a/test_common/harness/conversions.cpp
+++ b/test_common/harness/conversions.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -20,101 +20,123 @@
 #include "mt19937.h"
 #include "compat.h"
-#if defined(__SSE__) || defined(_MSC_VER)
-#include <xmmintrin.h>
+#if defined( __SSE__ ) || defined (_MSC_VER)
+    #include <xmmintrin.h>
-#if defined(__SSE2__) || defined(_MSC_VER)
-#include <emmintrin.h>
+#if defined( __SSE2__ ) || defined (_MSC_VER)
+    #include <emmintrin.h>
-void print_type_to_string(ExplicitType type, void *data, char *string)
-    switch (type)
-    {
-        case kBool:
-            if (*(char *)data)
-                sprintf(string, "true");
-            else
-                sprintf(string, "false");
+void print_type_to_string(ExplicitType type, void *data, char* string) {
+     switch (type) {
+       case kBool:
+      if (*(char*)data)
+        sprintf(string, "true");
+      else
+        sprintf(string, "false");
-        case kChar: sprintf(string, "%d", (int)*((cl_char *)data)); return;
-        case kUChar:
-        case kUnsignedChar:
-            sprintf(string, "%u", (int)*((cl_uchar *)data));
-            return;
-        case kShort: sprintf(string, "%d", (int)*((cl_short *)data)); return;
-        case kUShort:
-        case kUnsignedShort:
-            sprintf(string, "%u", (int)*((cl_ushort *)data));
-            return;
-        case kInt: sprintf(string, "%d", *((cl_int *)data)); return;
-        case kUInt:
-        case kUnsignedInt: sprintf(string, "%u", *((cl_uint *)data)); return;
-        case kLong: sprintf(string, "%lld", *((cl_long *)data)); return;
-        case kULong:
-        case kUnsignedLong:
-            sprintf(string, "%llu", *((cl_ulong *)data));
-            return;
-        case kFloat: sprintf(string, "%f", *((cl_float *)data)); return;
-        case kHalf: sprintf(string, "half"); return;
-        case kDouble: sprintf(string, "%g", *((cl_double *)data)); return;
-        default: sprintf(string, "INVALID"); return;
-    }
+    case kChar:
+      sprintf(string, "%d", (int)*((cl_char*)data));
+      return;
+    case kUChar:
+    case kUnsignedChar:
+      sprintf(string, "%u", (int)*((cl_uchar*)data));
+      return;
+    case kShort:
+      sprintf(string, "%d", (int)*((cl_short*)data));
+      return;
+    case kUShort:
+    case kUnsignedShort:
+      sprintf(string, "%u", (int)*((cl_ushort*)data));
+      return;
+    case kInt:
+      sprintf(string, "%d", *((cl_int*)data));
+      return;
+    case kUInt:
+    case kUnsignedInt:
+      sprintf(string, "%u", *((cl_uint*)data));
+      return;
+    case kLong:
+      sprintf(string, "%lld", *((cl_long*)data));
+      return;
+    case kULong:
+    case kUnsignedLong:
+      sprintf(string, "%llu", *((cl_ulong*)data));
+      return;
+    case kFloat:
+      sprintf(string, "%f", *((cl_float*)data));
+      return;
+    case kHalf:
+      sprintf(string, "half");
+      return;
+    case kDouble:
+      sprintf(string, "%g", *((cl_double*)data));
+      return;
+    default:
+      sprintf(string, "INVALID");
+      return;
+  }
-size_t get_explicit_type_size(ExplicitType type)
+size_t get_explicit_type_size( ExplicitType type )
-    /* Quick method to avoid branching: make sure the following array matches
-     * the Enum order */
-    static size_t sExplicitTypeSizes[] = {
-        sizeof(cl_bool),   sizeof(cl_char),  sizeof(cl_uchar),
-        sizeof(cl_uchar),  sizeof(cl_short), sizeof(cl_ushort),
-        sizeof(cl_ushort), sizeof(cl_int),   sizeof(cl_uint),
-        sizeof(cl_uint),   sizeof(cl_long),  sizeof(cl_ulong),
-        sizeof(cl_ulong),  sizeof(cl_float), sizeof(cl_half),
-        sizeof(cl_double)
-    };
+    /* Quick method to avoid branching: make sure the following array matches the Enum order */
+    static size_t    sExplicitTypeSizes[] = {
+            sizeof( cl_bool ),
+            sizeof( cl_char ),
+            sizeof( cl_uchar ),
+            sizeof( cl_uchar ),
+            sizeof( cl_short ),
+            sizeof( cl_ushort ),
+            sizeof( cl_ushort ),
+            sizeof( cl_int ),
+            sizeof( cl_uint ),
+            sizeof( cl_uint ),
+            sizeof( cl_long ),
+            sizeof( cl_ulong ),
+            sizeof( cl_ulong ),
+            sizeof( cl_float ),
+            sizeof( cl_half ),
+            sizeof( cl_double )
+        };
-    return sExplicitTypeSizes[type];
+    return sExplicitTypeSizes[ type ];
-const char *get_explicit_type_name(ExplicitType type)
+const char * get_explicit_type_name( ExplicitType type )
-    /* Quick method to avoid branching: make sure the following array matches
-     * the Enum order */
-    static const char *sExplicitTypeNames[] = {
-        "bool",           "char",  "uchar", "unsigned char", "short", "ushort",
-        "unsigned short", "int",   "uint",  "unsigned int",  "long",  "ulong",
-        "unsigned long",  "float", "half",  "double"
-    };
+    /* Quick method to avoid branching: make sure the following array matches the Enum order */
+    static const char *sExplicitTypeNames[] = { "bool", "char", "uchar", "unsigned char", "short", "ushort", "unsigned short", "int",
+                            "uint", "unsigned int", "long", "ulong", "unsigned long", "float", "half", "double" };
-    return sExplicitTypeNames[type];
+    return sExplicitTypeNames[ type ];
-static long lrintf_clamped(float f);
-static long lrintf_clamped(float f)
+static long lrintf_clamped( float f );
+static long lrintf_clamped( float f )
-    static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23),
-                                    -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) };
+    static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
-    if (f >= -(float)LONG_MIN) return LONG_MAX;
+    if( f >= -(float) LONG_MIN )
+        return LONG_MAX;
-    if (f <= (float)LONG_MIN) return LONG_MIN;
+    if( f <= (float) LONG_MIN )
+        return LONG_MIN;
     // Round fractional values to integer in round towards nearest mode
-    if (fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23))
+    if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) )
         volatile float x = f;
-        float magicVal = magic[f < 0];
+        float magicVal = magic[ f < 0 ];
-#if defined(__SSE__) || defined(_WIN32)
-        // Defeat x87 based arithmetic, which cant do FTZ, and will round this
-        // incorrectly
-        __m128 v = _mm_set_ss(x);
-        __m128 m = _mm_set_ss(magicVal);
-        v = _mm_add_ss(v, m);
-        v = _mm_sub_ss(v, m);
-        _mm_store_ss((float *)&x, v);
+#if defined( __SSE__ ) || defined (_WIN32)
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128 v = _mm_set_ss( x );
+        __m128 m = _mm_set_ss( magicVal );
+        v = _mm_add_ss( v, m );
+        v = _mm_sub_ss( v, m );
+        _mm_store_ss( (float*) &x, v );
         x += magicVal;
         x -= magicVal;
@@ -122,39 +144,40 @@
         f = x;
-    return (long)f;
+    return (long) f;
-static long lrint_clamped(double f);
-static long lrint_clamped(double f)
+static long lrint_clamped( double f );
+static long lrint_clamped( double f )
-    static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52),
-                                     MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
+    static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
-    if (sizeof(long) > 4)
+    if( sizeof( long ) > 4 )
-        if (f >= -(double)LONG_MIN) return LONG_MAX;
+        if( f >= -(double) LONG_MIN )
+            return LONG_MAX;
-        if (f >= LONG_MAX) return LONG_MAX;
+        if( f >= LONG_MAX )
+            return LONG_MAX;
-    if (f <= (double)LONG_MIN) return LONG_MIN;
+    if( f <= (double) LONG_MIN )
+        return LONG_MIN;
     // Round fractional values to integer in round towards nearest mode
-    if (fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52))
+    if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) )
         volatile double x = f;
-        double magicVal = magic[f < 0];
-#if defined(__SSE2__) || (defined(_MSC_VER))
-        // Defeat x87 based arithmetic, which cant do FTZ, and will round this
-        // incorrectly
-        __m128d v = _mm_set_sd(x);
-        __m128d m = _mm_set_sd(magicVal);
-        v = _mm_add_sd(v, m);
-        v = _mm_sub_sd(v, m);
-        _mm_store_sd((double *)&x, v);
+        double magicVal = magic[ f < 0 ];
+#if defined( __SSE2__ ) || (defined (_MSC_VER))
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128d v = _mm_set_sd( x );
+        __m128d m = _mm_set_sd( magicVal );
+        v = _mm_add_sd( v, m );
+        v = _mm_sub_sd( v, m );
+        _mm_store_sd( (double*) &x, v );
         x += magicVal;
         x -= magicVal;
@@ -162,236 +185,195 @@
         f = x;
-    return (long)f;
+    return (long) f;
 typedef cl_long Long;
 typedef cl_ulong ULong;
-static ULong sUpperLimits[kNumExplicitTypes] = {
-    0,
-    127,
-    255,
-    255,
-    32767,
-    65535,
-    65535,
-    0x7fffffffLL,
-    0xffffffffLL,
-    0xffffffffLL,
-    0x7fffffffffffffffLL,
-    0xffffffffffffffffLL,
-    0xffffffffffffffffLL,
-    0,
-    0
-}; // Last two values aren't stored here
+static ULong sUpperLimits[ kNumExplicitTypes ] =
+    {
+        0,
+        127, 255, 255,
+        32767, 65535, 65535,
+        0x7fffffffLL, 0xffffffffLL, 0xffffffffLL,
+        0x7fffffffffffffffLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL,
+        0, 0 };    // Last two values aren't stored here
-static Long sLowerLimits[kNumExplicitTypes] = {
-    -1,
-    -128,
-    0,
-    0,
-    -32768,
-    0,
-    0,
-    (Long)0xffffffff80000000LL,
-    0,
-    0,
-    (Long)0x8000000000000000LL,
-    0,
-    0,
-    0,
-    0
-}; // Last two values aren't stored here
+static Long sLowerLimits[ kNumExplicitTypes ] =
+    {
+        -1,
+        -128, 0, 0,
+        -32768, 0, 0,
+        0xffffffff80000000LL, 0, 0,
+        0x8000000000000000LL, 0, 0,
+        0, 0 };    // Last two values aren't stored here
-#define BOOL_CASE(inType)                                                      \
-    case kBool:                                                                \
-        boolPtr = (bool *)outRaw;                                              \
-        *boolPtr = (*inType##Ptr) != 0 ? true : false;                         \
+#define BOOL_CASE(inType) \
+        case kBool:    \
+            boolPtr = (bool *)outRaw; \
+            *boolPtr = ( *inType##Ptr ) != 0 ? true : false; \
+            break;
+#define SIMPLE_CAST_CASE(inType,outEnum,outType) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            *outType##Ptr = (outType)(*inType##Ptr);    \
+            break;
+// Sadly, the ULong downcasting cases need a separate #define to get rid of signed/unsigned comparison warnings
+#define DOWN_CAST_CASE(inType,outEnum,outType,sat) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && *inType##Ptr > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)*inType##Ptr > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( *inType##Ptr < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)*inType##Ptr;    \
+            } else {                                \
+                *outType##Ptr = (outType)( *inType##Ptr & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+            break;
+#define U_DOWN_CAST_CASE(inType,outEnum,outType,sat) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            if( sat )                                \
+            {                                        \
+                if( (ULong)*inType##Ptr > sUpperLimits[outEnum] )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else                                            \
+                    *outType##Ptr = (outType)*inType##Ptr;    \
+            } else {                                \
+                *outType##Ptr = (outType)( *inType##Ptr & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+            break;
+#define TO_FLOAT_CASE(inType)                \
+        case kFloat:                        \
+            floatPtr = (float *)outRaw;        \
+            *floatPtr = (float)(*inType##Ptr);    \
+            break;
+#define TO_DOUBLE_CASE(inType)                \
+        case kDouble:                        \
+            doublePtr = (double *)outRaw;        \
+            *doublePtr = (double)(*inType##Ptr);    \
+            break;
+/* Note: we use lrintf here to force the rounding instead of whatever the processor's current rounding mode is */
+#define FLOAT_ROUND_TO_NEAREST_CASE(outEnum,outType)    \
+        case outEnum:                                    \
+            outType##Ptr = (outType *)outRaw;            \
+            *outType##Ptr = (outType)lrintf_clamped( *floatPtr );    \
+            break;
+#define FLOAT_ROUND_CASE(outEnum,outType,rounding,sat)    \
+        case outEnum:                                    \
+        {                                                \
+            outType##Ptr = (outType *)outRaw;            \
+            /* Get the tens digit */                    \
+            Long wholeValue = (Long)*floatPtr;\
+            float largeRemainder = ( *floatPtr - (float)wholeValue ) * 10.f; \
+            /* What do we do based on that? */                \
+            if( rounding == kRoundToEven )                    \
+            {                                                \
+                if( wholeValue & 1LL )    /*between 1 and 1.99 */    \
+                    wholeValue += 1LL;    /* round up to even */  \
+            }                                                \
+            else if( rounding == kRoundToZero )                \
+            {                                                \
+                /* Nothing to do, round-to-zero is what C casting does */                            \
+            }                                                \
+            else if( rounding == kRoundToPosInf )            \
+            {                                                \
+                /* Only positive numbers are wrong */        \
+                if( largeRemainder != 0.f && wholeValue >= 0 )    \
+                    wholeValue++;                            \
+            }                                                \
+            else if( rounding == kRoundToNegInf )            \
+            {                                                \
+                /* Only negative numbers are off */            \
+                if( largeRemainder != 0.f && wholeValue < 0 ) \
+                    wholeValue--;                            \
+            }                                                \
+            else                                            \
+            {   /* Default is round-to-nearest */            \
+                wholeValue = (Long)lrintf_clamped( *floatPtr );    \
+            }                                                \
+            /* Now apply saturation rules */                \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && wholeValue > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)wholeValue > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( wholeValue < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)wholeValue;    \
+            } else {                                \
+                *outType##Ptr = (outType)( wholeValue & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+        }                \
-#define SIMPLE_CAST_CASE(inType, outEnum, outType)                             \
-    case outEnum:                                                              \
-        outType##Ptr = (outType *)outRaw;                                      \
-        *outType##Ptr = (outType)(*inType##Ptr);                               \
+#define DOUBLE_ROUND_CASE(outEnum,outType,rounding,sat)    \
+        case outEnum:                                    \
+        {                                                \
+            outType##Ptr = (outType *)outRaw;            \
+            /* Get the tens digit */                    \
+            Long wholeValue = (Long)*doublePtr;\
+            double largeRemainder = ( *doublePtr - (double)wholeValue ) * 10.0; \
+            /* What do we do based on that? */                \
+            if( rounding == kRoundToEven )                    \
+            {                                                \
+                if( wholeValue & 1LL )    /*between 1 and 1.99 */    \
+                    wholeValue += 1LL;    /* round up to even */  \
+            }                                                \
+            else if( rounding == kRoundToZero )                \
+            {                                                \
+                /* Nothing to do, round-to-zero is what C casting does */                            \
+            }                                                \
+            else if( rounding == kRoundToPosInf )            \
+            {                                                \
+                /* Only positive numbers are wrong */        \
+                if( largeRemainder != 0.0 && wholeValue >= 0 )    \
+                    wholeValue++;                            \
+            }                                                \
+            else if( rounding == kRoundToNegInf )            \
+            {                                                \
+                /* Only negative numbers are off */            \
+                if( largeRemainder != 0.0 && wholeValue < 0 ) \
+                    wholeValue--;                            \
+            }                                                \
+            else                                            \
+            {   /* Default is round-to-nearest */            \
+                wholeValue = (Long)lrint_clamped( *doublePtr );    \
+            }                                                \
+            /* Now apply saturation rules */                \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && wholeValue > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)wholeValue > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( wholeValue < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)wholeValue;    \
+            } else {                                \
+                *outType##Ptr = (outType)( wholeValue & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+        }                \
-// Sadly, the ULong downcasting cases need a separate #define to get rid of
-// signed/unsigned comparison warnings
-#define DOWN_CAST_CASE(inType, outEnum, outType, sat)                          \
-    case outEnum:                                                              \
-        outType##Ptr = (outType *)outRaw;                                      \
-        if (sat)                                                               \
-        {                                                                      \
-            if ((sLowerLimits[outEnum] < 0                                     \
-                 && *inType##Ptr > (Long)sUpperLimits[outEnum])                \
-                || (sLowerLimits[outEnum] == 0                                 \
-                    && (ULong)*inType##Ptr > sUpperLimits[outEnum]))           \
-                *outType##Ptr = (outType)sUpperLimits[outEnum];                \
-            else if (*inType##Ptr < sLowerLimits[outEnum])                     \
-                *outType##Ptr = (outType)sLowerLimits[outEnum];                \
-            else                                                               \
-                *outType##Ptr = (outType)*inType##Ptr;                         \
-        }                                                                      \
-        else                                                                   \
-        {                                                                      \
-            *outType##Ptr = (outType)(                                         \
-                *inType##Ptr                                                   \
-                & (0xffffffffffffffffLL >> (64 - (sizeof(outType) * 8))));     \
-        }                                                                      \
-        break;
-#define U_DOWN_CAST_CASE(inType, outEnum, outType, sat)                        \
-    case outEnum:                                                              \
-        outType##Ptr = (outType *)outRaw;                                      \
-        if (sat)                                                               \
-        {                                                                      \
-            if ((ULong)*inType##Ptr > sUpperLimits[outEnum])                   \
-                *outType##Ptr = (outType)sUpperLimits[outEnum];                \
-            else                                                               \
-                *outType##Ptr = (outType)*inType##Ptr;                         \
-        }                                                                      \
-        else                                                                   \
-        {                                                                      \
-            *outType##Ptr = (outType)(                                         \
-                *inType##Ptr                                                   \
-                & (0xffffffffffffffffLL >> (64 - (sizeof(outType) * 8))));     \
-        }                                                                      \
-        break;
-#define TO_FLOAT_CASE(inType)                                                  \
-    case kFloat:                                                               \
-        floatPtr = (float *)outRaw;                                            \
-        *floatPtr = (float)(*inType##Ptr);                                     \
-        break;
-#define TO_DOUBLE_CASE(inType)                                                 \
-    case kDouble:                                                              \
-        doublePtr = (double *)outRaw;                                          \
-        *doublePtr = (double)(*inType##Ptr);                                   \
-        break;
-/* Note: we use lrintf here to force the rounding instead of whatever the
- * processor's current rounding mode is */
-#define FLOAT_ROUND_TO_NEAREST_CASE(outEnum, outType)                          \
-    case outEnum:                                                              \
-        outType##Ptr = (outType *)outRaw;                                      \
-        *outType##Ptr = (outType)lrintf_clamped(*floatPtr);                    \
-        break;
-#define FLOAT_ROUND_CASE(outEnum, outType, rounding, sat)                      \
-    case outEnum: {                                                            \
-        outType##Ptr = (outType *)outRaw;                                      \
-        /* Get the tens digit */                                               \
-        Long wholeValue = (Long)*floatPtr;                                     \
-        float largeRemainder = (*floatPtr - (float)wholeValue) * 10.f;         \
-        /* What do we do based on that? */                                     \
-        if (rounding == kRoundToEven)                                          \
-        {                                                                      \
-            if (wholeValue & 1LL) /*between 1 and 1.99 */                      \
-                wholeValue += 1LL; /* round up to even */                      \
-        }                                                                      \
-        else if (rounding == kRoundToZero)                                     \
-        {                                                                      \
-            /* Nothing to do, round-to-zero is what C casting does */          \
-        }                                                                      \
-        else if (rounding == kRoundToPosInf)                                   \
-        {                                                                      \
-            /* Only positive numbers are wrong */                              \
-            if (largeRemainder != 0.f && wholeValue >= 0) wholeValue++;        \
-        }                                                                      \
-        else if (rounding == kRoundToNegInf)                                   \
-        {                                                                      \
-            /* Only negative numbers are off */                                \
-            if (largeRemainder != 0.f && wholeValue < 0) wholeValue--;         \
-        }                                                                      \
-        else                                                                   \
-        { /* Default is round-to-nearest */                                    \
-            wholeValue = (Long)lrintf_clamped(*floatPtr);                      \
-        }                                                                      \
-        /* Now apply saturation rules */                                       \
-        if (sat)                                                               \
-        {                                                                      \
-            if ((sLowerLimits[outEnum] < 0                                     \
-                 && wholeValue > (Long)sUpperLimits[outEnum])                  \
-                || (sLowerLimits[outEnum] == 0                                 \
-                    && (ULong)wholeValue > sUpperLimits[outEnum]))             \
-                *outType##Ptr = (outType)sUpperLimits[outEnum];                \
-            else if (wholeValue < sLowerLimits[outEnum])                       \
-                *outType##Ptr = (outType)sLowerLimits[outEnum];                \
-            else                                                               \
-                *outType##Ptr = (outType)wholeValue;                           \
-        }                                                                      \
-        else                                                                   \
-        {                                                                      \
-            *outType##Ptr = (outType)(                                         \
-                wholeValue                                                     \
-                & (0xffffffffffffffffLL >> (64 - (sizeof(outType) * 8))));     \
-        }                                                                      \
-    }                                                                          \
-    break;
-#define DOUBLE_ROUND_CASE(outEnum, outType, rounding, sat)                     \
-    case outEnum: {                                                            \
-        outType##Ptr = (outType *)outRaw;                                      \
-        /* Get the tens digit */                                               \
-        Long wholeValue = (Long)*doublePtr;                                    \
-        double largeRemainder = (*doublePtr - (double)wholeValue) * 10.0;      \
-        /* What do we do based on that? */                                     \
-        if (rounding == kRoundToEven)                                          \
-        {                                                                      \
-            if (wholeValue & 1LL) /*between 1 and 1.99 */                      \
-                wholeValue += 1LL; /* round up to even */                      \
-        }                                                                      \
-        else if (rounding == kRoundToZero)                                     \
-        {                                                                      \
-            /* Nothing to do, round-to-zero is what C casting does */          \
-        }                                                                      \
-        else if (rounding == kRoundToPosInf)                                   \
-        {                                                                      \
-            /* Only positive numbers are wrong */                              \
-            if (largeRemainder != 0.0 && wholeValue >= 0) wholeValue++;        \
-        }                                                                      \
-        else if (rounding == kRoundToNegInf)                                   \
-        {                                                                      \
-            /* Only negative numbers are off */                                \
-            if (largeRemainder != 0.0 && wholeValue < 0) wholeValue--;         \
-        }                                                                      \
-        else                                                                   \
-        { /* Default is round-to-nearest */                                    \
-            wholeValue = (Long)lrint_clamped(*doublePtr);                      \
-        }                                                                      \
-        /* Now apply saturation rules */                                       \
-        if (sat)                                                               \
-        {                                                                      \
-            if ((sLowerLimits[outEnum] < 0                                     \
-                 && wholeValue > (Long)sUpperLimits[outEnum])                  \
-                || (sLowerLimits[outEnum] == 0                                 \
-                    && (ULong)wholeValue > sUpperLimits[outEnum]))             \
-                *outType##Ptr = (outType)sUpperLimits[outEnum];                \
-            else if (wholeValue < sLowerLimits[outEnum])                       \
-                *outType##Ptr = (outType)sLowerLimits[outEnum];                \
-            else                                                               \
-                *outType##Ptr = (outType)wholeValue;                           \
-        }                                                                      \
-        else                                                                   \
-        {                                                                      \
-            *outType##Ptr = (outType)(                                         \
-                wholeValue                                                     \
-                & (0xffffffffffffffffLL >> (64 - (sizeof(outType) * 8))));     \
-        }                                                                      \
-    }                                                                          \
-    break;
 typedef unsigned char uchar;
 typedef unsigned short ushort;
 typedef unsigned int uint;
 typedef unsigned long ulong;
-void convert_explicit_value(void *inRaw, void *outRaw, ExplicitType inType,
-                            bool saturate, RoundingType roundType,
-                            ExplicitType outType)
+void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType )
     bool *boolPtr;
     char *charPtr;
@@ -406,14 +388,14 @@
     double *doublePtr;
-    switch (inType)
+    switch( inType )
         case kBool:
             boolPtr = (bool *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kBool:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
                 case kChar:
@@ -428,482 +410,464 @@
                 case kLong:
                 case kULong:
                 case kUnsignedLong:
-                    memset(outRaw, *boolPtr ? 0xff : 0,
-                           get_explicit_type_size(outType));
+                    memset( outRaw, *boolPtr ? 0xff : 0, get_explicit_type_size( outType ) );
                 case kFloat:
                     floatPtr = (float *)outRaw;
-                    *floatPtr = (*boolPtr) ? -1.f : 0.f;
+                    *floatPtr = ( *boolPtr ) ? -1.f : 0.f;
                 case kDouble:
                     doublePtr = (double *)outRaw;
-                    *doublePtr = (*boolPtr) ? -1.0 : 0.0;
+                    *doublePtr = ( *boolPtr ) ? -1.0 : 0.0;
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kChar:
             charPtr = (char *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kChar:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    DOWN_CAST_CASE(char, kUChar, uchar, saturate)
-                    SIMPLE_CAST_CASE(char, kUnsignedChar, uchar)
-                    SIMPLE_CAST_CASE(char, kShort, short)
-                    SIMPLE_CAST_CASE(char, kUShort, ushort)
-                    SIMPLE_CAST_CASE(char, kUnsignedShort, ushort)
-                    SIMPLE_CAST_CASE(char, kInt, int)
-                    SIMPLE_CAST_CASE(char, kUInt, uint)
-                    SIMPLE_CAST_CASE(char, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(char, kLong, Long)
-                    SIMPLE_CAST_CASE(char, kULong, ULong)
-                    SIMPLE_CAST_CASE(char, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(char,kUChar,uchar,saturate)
+                SIMPLE_CAST_CASE(char,kUnsignedChar,uchar)
+                SIMPLE_CAST_CASE(char,kShort,short)
+                SIMPLE_CAST_CASE(char,kUShort,ushort)
+                SIMPLE_CAST_CASE(char,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(char,kInt,int)
+                SIMPLE_CAST_CASE(char,kUInt,uint)
+                SIMPLE_CAST_CASE(char,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(char,kLong,Long)
+                SIMPLE_CAST_CASE(char,kULong,ULong)
+                SIMPLE_CAST_CASE(char,kUnsignedLong,ULong)
-                    TO_FLOAT_CASE(char)
-                    TO_DOUBLE_CASE(char)
+                TO_FLOAT_CASE(char)
+                TO_DOUBLE_CASE(char)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kUChar:
             ucharPtr = (uchar *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kUChar:
                 case kUnsignedChar:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    DOWN_CAST_CASE(uchar, kChar, char, saturate)
-                    SIMPLE_CAST_CASE(uchar, kShort, short)
-                    SIMPLE_CAST_CASE(uchar, kUShort, ushort)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedShort, ushort)
-                    SIMPLE_CAST_CASE(uchar, kInt, int)
-                    SIMPLE_CAST_CASE(uchar, kUInt, uint)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(uchar, kLong, Long)
-                    SIMPLE_CAST_CASE(uchar, kULong, ULong)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(uchar,kChar,char,saturate)
+                SIMPLE_CAST_CASE(uchar,kShort,short)
+                SIMPLE_CAST_CASE(uchar,kUShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kInt,int)
+                SIMPLE_CAST_CASE(uchar,kUInt,uint)
+                SIMPLE_CAST_CASE(uchar,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(uchar,kLong,Long)
+                SIMPLE_CAST_CASE(uchar,kULong,ULong)
+                SIMPLE_CAST_CASE(uchar,kUnsignedLong,ULong)
-                    TO_FLOAT_CASE(uchar)
-                    TO_DOUBLE_CASE(uchar)
+                TO_FLOAT_CASE(uchar)
+                TO_DOUBLE_CASE(uchar)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kUnsignedChar:
             ucharPtr = (uchar *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kUChar:
                 case kUnsignedChar:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    DOWN_CAST_CASE(uchar, kChar, char, saturate)
-                    SIMPLE_CAST_CASE(uchar, kShort, short)
-                    SIMPLE_CAST_CASE(uchar, kUShort, ushort)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedShort, ushort)
-                    SIMPLE_CAST_CASE(uchar, kInt, int)
-                    SIMPLE_CAST_CASE(uchar, kUInt, uint)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(uchar, kLong, Long)
-                    SIMPLE_CAST_CASE(uchar, kULong, ULong)
-                    SIMPLE_CAST_CASE(uchar, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(uchar,kChar,char,saturate)
+                SIMPLE_CAST_CASE(uchar,kShort,short)
+                SIMPLE_CAST_CASE(uchar,kUShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kInt,int)
+                SIMPLE_CAST_CASE(uchar,kUInt,uint)
+                SIMPLE_CAST_CASE(uchar,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(uchar,kLong,Long)
+                SIMPLE_CAST_CASE(uchar,kULong,ULong)
+                SIMPLE_CAST_CASE(uchar,kUnsignedLong,ULong)
-                    TO_FLOAT_CASE(uchar)
-                    TO_DOUBLE_CASE(uchar)
+                TO_FLOAT_CASE(uchar)
+                TO_DOUBLE_CASE(uchar)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kShort:
             shortPtr = (short *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kShort:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    DOWN_CAST_CASE(short, kChar, char, saturate)
-                    DOWN_CAST_CASE(short, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(short, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(short, kUShort, ushort, saturate)
-                    DOWN_CAST_CASE(short, kUnsignedShort, ushort, saturate)
-                    SIMPLE_CAST_CASE(short, kInt, int)
-                    SIMPLE_CAST_CASE(short, kUInt, uint)
-                    SIMPLE_CAST_CASE(short, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(short, kLong, Long)
-                    SIMPLE_CAST_CASE(short, kULong, ULong)
-                    SIMPLE_CAST_CASE(short, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(short,kChar,char,saturate)
+                DOWN_CAST_CASE(short,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(short,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(short,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(short,kUnsignedShort,ushort,saturate)
+                SIMPLE_CAST_CASE(short,kInt,int)
+                SIMPLE_CAST_CASE(short,kUInt,uint)
+                SIMPLE_CAST_CASE(short,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(short,kLong,Long)
+                SIMPLE_CAST_CASE(short,kULong,ULong)
+                SIMPLE_CAST_CASE(short,kUnsignedLong,ULong)
-                    TO_FLOAT_CASE(short)
-                    TO_DOUBLE_CASE(short)
+                TO_FLOAT_CASE(short)
+                TO_DOUBLE_CASE(short)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kUShort:
             ushortPtr = (ushort *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kUShort:
                 case kUnsignedShort:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    DOWN_CAST_CASE(ushort, kChar, char, saturate)
-                    DOWN_CAST_CASE(ushort, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(ushort, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(ushort, kShort, short, saturate)
-                    SIMPLE_CAST_CASE(ushort, kInt, int)
-                    SIMPLE_CAST_CASE(ushort, kUInt, uint)
-                    SIMPLE_CAST_CASE(ushort, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(ushort, kLong, Long)
-                    SIMPLE_CAST_CASE(ushort, kULong, ULong)
-                    SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(ushort,kChar,char,saturate)
+                DOWN_CAST_CASE(ushort,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kShort,short,saturate)
+                SIMPLE_CAST_CASE(ushort,kInt,int)
+                SIMPLE_CAST_CASE(ushort,kUInt,uint)
+                SIMPLE_CAST_CASE(ushort,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(ushort,kLong,Long)
+                SIMPLE_CAST_CASE(ushort,kULong,ULong)
+                SIMPLE_CAST_CASE(ushort,kUnsignedLong,ULong)
-                    TO_FLOAT_CASE(ushort)
-                    TO_DOUBLE_CASE(ushort)
+                TO_FLOAT_CASE(ushort)
+                TO_DOUBLE_CASE(ushort)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kUnsignedShort:
             ushortPtr = (ushort *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kUShort:
                 case kUnsignedShort:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    DOWN_CAST_CASE(ushort, kChar, char, saturate)
-                    DOWN_CAST_CASE(ushort, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(ushort, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(ushort, kShort, short, saturate)
-                    SIMPLE_CAST_CASE(ushort, kInt, int)
-                    SIMPLE_CAST_CASE(ushort, kUInt, uint)
-                    SIMPLE_CAST_CASE(ushort, kUnsignedInt, uint)
-                    SIMPLE_CAST_CASE(ushort, kLong, Long)
-                    SIMPLE_CAST_CASE(ushort, kULong, ULong)
-                    SIMPLE_CAST_CASE(ushort, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(ushort,kChar,char,saturate)
+                DOWN_CAST_CASE(ushort,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kShort,short,saturate)
+                SIMPLE_CAST_CASE(ushort,kInt,int)
+                SIMPLE_CAST_CASE(ushort,kUInt,uint)
+                SIMPLE_CAST_CASE(ushort,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(ushort,kLong,Long)
+                SIMPLE_CAST_CASE(ushort,kULong,ULong)
+                SIMPLE_CAST_CASE(ushort,kUnsignedLong,ULong)
-                    TO_FLOAT_CASE(ushort)
-                    TO_DOUBLE_CASE(ushort)
+                TO_FLOAT_CASE(ushort)
+                TO_DOUBLE_CASE(ushort)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kInt:
             intPtr = (int *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kInt:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    DOWN_CAST_CASE(int, kChar, char, saturate)
-                    DOWN_CAST_CASE(int, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(int, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(int, kShort, short, saturate)
-                    DOWN_CAST_CASE(int, kUShort, ushort, saturate)
-                    DOWN_CAST_CASE(int, kUnsignedShort, ushort, saturate)
-                    DOWN_CAST_CASE(int, kUInt, uint, saturate)
-                    DOWN_CAST_CASE(int, kUnsignedInt, uint, saturate)
-                    SIMPLE_CAST_CASE(int, kLong, Long)
-                    SIMPLE_CAST_CASE(int, kULong, ULong)
-                    SIMPLE_CAST_CASE(int, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(int,kChar,char,saturate)
+                DOWN_CAST_CASE(int,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(int,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(int,kShort,short,saturate)
+                DOWN_CAST_CASE(int,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(int,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(int,kUInt,uint,saturate)
+                DOWN_CAST_CASE(int,kUnsignedInt,uint,saturate)
+                SIMPLE_CAST_CASE(int,kLong,Long)
+                SIMPLE_CAST_CASE(int,kULong,ULong)
+                SIMPLE_CAST_CASE(int,kUnsignedLong,ULong)
-                    TO_FLOAT_CASE(int)
-                    TO_DOUBLE_CASE(int)
+                TO_FLOAT_CASE(int)
+                TO_DOUBLE_CASE(int)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kUInt:
             uintPtr = (uint *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kUInt:
                 case kUnsignedInt:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    DOWN_CAST_CASE(uint, kChar, char, saturate)
-                    DOWN_CAST_CASE(uint, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(uint, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(uint, kShort, short, saturate)
-                    DOWN_CAST_CASE(uint, kUShort, ushort, saturate)
-                    DOWN_CAST_CASE(uint, kUnsignedShort, ushort, saturate)
-                    DOWN_CAST_CASE(uint, kInt, int, saturate)
-                    SIMPLE_CAST_CASE(uint, kLong, Long)
-                    SIMPLE_CAST_CASE(uint, kULong, ULong)
-                    SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(uint,kChar,char,saturate)
+                DOWN_CAST_CASE(uint,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kShort,short,saturate)
+                DOWN_CAST_CASE(uint,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kInt,int,saturate)
+                SIMPLE_CAST_CASE(uint,kLong,Long)
+                SIMPLE_CAST_CASE(uint,kULong,ULong)
+                SIMPLE_CAST_CASE(uint,kUnsignedLong,ULong)
-                    TO_FLOAT_CASE(uint)
-                    TO_DOUBLE_CASE(uint)
+                TO_FLOAT_CASE(uint)
+                TO_DOUBLE_CASE(uint)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kUnsignedInt:
             uintPtr = (uint *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kUInt:
                 case kUnsignedInt:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    DOWN_CAST_CASE(uint, kChar, char, saturate)
-                    DOWN_CAST_CASE(uint, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(uint, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(uint, kShort, short, saturate)
-                    DOWN_CAST_CASE(uint, kUShort, ushort, saturate)
-                    DOWN_CAST_CASE(uint, kUnsignedShort, ushort, saturate)
-                    DOWN_CAST_CASE(uint, kInt, int, saturate)
-                    SIMPLE_CAST_CASE(uint, kLong, Long)
-                    SIMPLE_CAST_CASE(uint, kULong, ULong)
-                    SIMPLE_CAST_CASE(uint, kUnsignedLong, ULong)
+                DOWN_CAST_CASE(uint,kChar,char,saturate)
+                DOWN_CAST_CASE(uint,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kShort,short,saturate)
+                DOWN_CAST_CASE(uint,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kInt,int,saturate)
+                SIMPLE_CAST_CASE(uint,kLong,Long)
+                SIMPLE_CAST_CASE(uint,kULong,ULong)
+                SIMPLE_CAST_CASE(uint,kUnsignedLong,ULong)
-                    TO_FLOAT_CASE(uint)
-                    TO_DOUBLE_CASE(uint)
+                TO_FLOAT_CASE(uint)
+                TO_DOUBLE_CASE(uint)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kLong:
             LongPtr = (Long *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kLong:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    DOWN_CAST_CASE(Long, kChar, char, saturate)
-                    DOWN_CAST_CASE(Long, kUChar, uchar, saturate)
-                    DOWN_CAST_CASE(Long, kUnsignedChar, uchar, saturate)
-                    DOWN_CAST_CASE(Long, kShort, short, saturate)
-                    DOWN_CAST_CASE(Long, kUShort, ushort, saturate)
-                    DOWN_CAST_CASE(Long, kUnsignedShort, ushort, saturate)
-                    DOWN_CAST_CASE(Long, kInt, int, saturate)
-                    DOWN_CAST_CASE(Long, kUInt, uint, saturate)
-                    DOWN_CAST_CASE(Long, kUnsignedInt, uint, saturate)
-                    DOWN_CAST_CASE(Long, kULong, ULong, saturate)
-                    DOWN_CAST_CASE(Long, kUnsignedLong, ULong, saturate)
+                DOWN_CAST_CASE(Long,kChar,char,saturate)
+                DOWN_CAST_CASE(Long,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(Long,kShort,short,saturate)
+                DOWN_CAST_CASE(Long,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(Long,kInt,int,saturate)
+                DOWN_CAST_CASE(Long,kUInt,uint,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedInt,uint,saturate)
+                DOWN_CAST_CASE(Long,kULong,ULong,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedLong,ULong,saturate)
-                    TO_FLOAT_CASE(Long)
-                    TO_DOUBLE_CASE(Long)
+                TO_FLOAT_CASE(Long)
+                TO_DOUBLE_CASE(Long)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kULong:
             ULongPtr = (ULong *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kUnsignedLong:
                 case kULong:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    U_DOWN_CAST_CASE(ULong, kChar, char, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUChar, uchar, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedChar, uchar, saturate)
-                    U_DOWN_CAST_CASE(ULong, kShort, short, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUShort, ushort, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedShort, ushort, saturate)
-                    U_DOWN_CAST_CASE(ULong, kInt, int, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUInt, uint, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate)
-                    U_DOWN_CAST_CASE(ULong, kLong, Long, saturate)
+                U_DOWN_CAST_CASE(ULong,kChar,char,saturate)
+                U_DOWN_CAST_CASE(ULong,kUChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kShort,short,saturate)
+                U_DOWN_CAST_CASE(ULong,kUShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kInt,int,saturate)
+                U_DOWN_CAST_CASE(ULong,kUInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kLong,Long,saturate)
-                    TO_FLOAT_CASE(ULong)
-                    TO_DOUBLE_CASE(ULong)
+                TO_FLOAT_CASE(ULong)
+                TO_DOUBLE_CASE(ULong)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kUnsignedLong:
             ULongPtr = (ULong *)inRaw;
-            switch (outType)
+            switch( outType )
                 case kULong:
                 case kUnsignedLong:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    U_DOWN_CAST_CASE(ULong, kChar, char, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUChar, uchar, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedChar, uchar, saturate)
-                    U_DOWN_CAST_CASE(ULong, kShort, short, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUShort, ushort, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedShort, ushort, saturate)
-                    U_DOWN_CAST_CASE(ULong, kInt, int, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUInt, uint, saturate)
-                    U_DOWN_CAST_CASE(ULong, kUnsignedInt, uint, saturate)
-                    U_DOWN_CAST_CASE(ULong, kLong, Long, saturate)
+                U_DOWN_CAST_CASE(ULong,kChar,char,saturate)
+                U_DOWN_CAST_CASE(ULong,kUChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kShort,short,saturate)
+                U_DOWN_CAST_CASE(ULong,kUShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kInt,int,saturate)
+                U_DOWN_CAST_CASE(ULong,kUInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kLong,Long,saturate)
-                    TO_FLOAT_CASE(ULong)
-                    TO_DOUBLE_CASE(ULong)
+                TO_FLOAT_CASE(ULong)
+                TO_DOUBLE_CASE(ULong)
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kFloat:
             floatPtr = (float *)inRaw;
-            switch (outType)
+            switch( outType )
-                FLOAT_ROUND_CASE(kChar, char, roundType, saturate)
-                FLOAT_ROUND_CASE(kUChar, uchar, roundType, saturate)
-                FLOAT_ROUND_CASE(kUnsignedChar, uchar, roundType, saturate)
-                FLOAT_ROUND_CASE(kShort, short, roundType, saturate)
-                FLOAT_ROUND_CASE(kUShort, ushort, roundType, saturate)
-                FLOAT_ROUND_CASE(kUnsignedShort, ushort, roundType, saturate)
-                FLOAT_ROUND_CASE(kInt, int, roundType, saturate)
-                FLOAT_ROUND_CASE(kUInt, uint, roundType, saturate)
-                FLOAT_ROUND_CASE(kUnsignedInt, uint, roundType, saturate)
-                FLOAT_ROUND_CASE(kLong, Long, roundType, saturate)
-                FLOAT_ROUND_CASE(kULong, ULong, roundType, saturate)
-                FLOAT_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate)
+                FLOAT_ROUND_CASE(kChar,char,roundType,saturate)
+                FLOAT_ROUND_CASE(kUChar,uchar,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedChar,uchar,roundType,saturate)
+                FLOAT_ROUND_CASE(kShort,short,roundType,saturate)
+                FLOAT_ROUND_CASE(kUShort,ushort,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedShort,ushort,roundType,saturate)
+                FLOAT_ROUND_CASE(kInt,int,roundType,saturate)
+                FLOAT_ROUND_CASE(kUInt,uint,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedInt,uint,roundType,saturate)
+                FLOAT_ROUND_CASE(kLong,Long,roundType,saturate)
+                FLOAT_ROUND_CASE(kULong,ULong,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedLong,ULong,roundType,saturate)
                 case kFloat:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    TO_DOUBLE_CASE(float);
+                TO_DOUBLE_CASE(float);
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
         case kDouble:
             doublePtr = (double *)inRaw;
-            switch (outType)
+            switch( outType )
-                DOUBLE_ROUND_CASE(kChar, char, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUChar, uchar, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUnsignedChar, uchar, roundType, saturate)
-                DOUBLE_ROUND_CASE(kShort, short, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUShort, ushort, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUnsignedShort, ushort, roundType, saturate)
-                DOUBLE_ROUND_CASE(kInt, int, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUInt, uint, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUnsignedInt, uint, roundType, saturate)
-                DOUBLE_ROUND_CASE(kLong, Long, roundType, saturate)
-                DOUBLE_ROUND_CASE(kULong, ULong, roundType, saturate)
-                DOUBLE_ROUND_CASE(kUnsignedLong, ULong, roundType, saturate)
+                DOUBLE_ROUND_CASE(kChar,char,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUChar,uchar,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedChar,uchar,roundType,saturate)
+                DOUBLE_ROUND_CASE(kShort,short,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUShort,ushort,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedShort,ushort,roundType,saturate)
+                DOUBLE_ROUND_CASE(kInt,int,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUInt,uint,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedInt,uint,roundType,saturate)
+                DOUBLE_ROUND_CASE(kLong,Long,roundType,saturate)
+                DOUBLE_ROUND_CASE(kULong,ULong,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedLong,ULong,roundType,saturate)
                 case kDouble:
-                    memcpy(outRaw, inRaw, get_explicit_type_size(inType));
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
-                    log_error("ERROR: Invalid type given to "
-                              "convert_explicit_value!!\n");
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
-            log_error(
-                "ERROR: Invalid type given to convert_explicit_value!!\n");
+            log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
-void generate_random_data(ExplicitType type, size_t count, MTdata d,
-                          void *outData)
+void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData )
     bool *boolPtr;
     cl_char *charPtr;
@@ -916,93 +880,88 @@
     cl_ulong *ulongPtr;
     cl_float *floatPtr;
     cl_double *doublePtr;
-    cl_half *halfPtr;
+    cl_ushort *halfPtr;
     size_t i;
     cl_uint bits = genrand_int32(d);
     cl_uint bitsLeft = 32;
-    switch (type)
+    switch( type )
         case kBool:
             boolPtr = (bool *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                     bits = genrand_int32(d);
                     bitsLeft = 32;
-                boolPtr[i] = (bits & 1) ? true : false;
-                bits >>= 1;
-                bitsLeft -= 1;
+                boolPtr[i] = ( bits & 1 ) ? true : false;
+                bits >>= 1; bitsLeft -= 1;
         case kChar:
             charPtr = (cl_char *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                     bits = genrand_int32(d);
                     bitsLeft = 32;
-                charPtr[i] = (cl_char)((cl_int)(bits & 255) - 127);
-                bits >>= 8;
-                bitsLeft -= 8;
+                charPtr[i] = (cl_char)( (cl_int)(bits & 255 ) - 127 );
+                bits >>= 8; bitsLeft -= 8;
         case kUChar:
         case kUnsignedChar:
             ucharPtr = (cl_uchar *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                     bits = genrand_int32(d);
                     bitsLeft = 32;
-                ucharPtr[i] = (cl_uchar)(bits & 255);
-                bits >>= 8;
-                bitsLeft -= 8;
+                ucharPtr[i] = (cl_uchar)( bits & 255 );
+                bits >>= 8; bitsLeft -= 8;
         case kShort:
             shortPtr = (cl_short *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                     bits = genrand_int32(d);
                     bitsLeft = 32;
-                shortPtr[i] = (cl_short)((cl_int)(bits & 65535) - 32767);
-                bits >>= 16;
-                bitsLeft -= 16;
+                shortPtr[i] = (cl_short)( (cl_int)( bits & 65535 ) - 32767 );
+                bits >>= 16; bitsLeft -= 16;
         case kUShort:
         case kUnsignedShort:
             ushortPtr = (cl_ushort *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                     bits = genrand_int32(d);
                     bitsLeft = 32;
-                ushortPtr[i] = (cl_ushort)((cl_int)(bits & 65535));
-                bits >>= 16;
-                bitsLeft -= 16;
+                ushortPtr[i] = (cl_ushort)( (cl_int)( bits & 65535 ) );
+                bits >>= 16; bitsLeft -= 16;
         case kInt:
             intPtr = (cl_int *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
                 intPtr[i] = (cl_int)genrand_int32(d);
@@ -1011,7 +970,7 @@
         case kUInt:
         case kUnsignedInt:
             uintPtr = (cl_uint *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
                 uintPtr[i] = (unsigned int)genrand_int32(d);
@@ -1019,136 +978,157 @@
         case kLong:
             longPtr = (cl_long *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
-                longPtr[i] = (cl_long)genrand_int32(d)
-                    | ((cl_long)genrand_int32(d) << 32);
+                longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_long)genrand_int32(d) << 32 );
         case kULong:
         case kUnsignedLong:
             ulongPtr = (cl_ulong *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
-                ulongPtr[i] = (cl_ulong)genrand_int32(d)
-                    | ((cl_ulong)genrand_int32(d) << 32);
+                ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
         case kFloat:
             floatPtr = (cl_float *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
                 // [ -(double) 0x7fffffff, (double) 0x7fffffff ]
                 double t = genrand_real1(d);
-                floatPtr[i] = (float)((1.0 - t) * -(double)0x7fffffff
-                                      + t * (double)0x7fffffff);
+                floatPtr[i] = (float) ((1.0 - t) * -(double) 0x7fffffff + t * (double) 0x7fffffff);
         case kDouble:
             doublePtr = (cl_double *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
-                cl_long u = (cl_long)genrand_int32(d)
-                    | ((cl_long)genrand_int32(d) << 32);
-                double t = (double)u;
-                // scale [-2**63, 2**63] to [-2**31, 2**31]
-                t *= MAKE_HEX_DOUBLE(0x1.0p-32, 0x1, -32);
+                cl_long u = (cl_long)genrand_int32(d) | ( (cl_long)genrand_int32(d) << 32 );
+                double t = (double) u;
+                t *= MAKE_HEX_DOUBLE( 0x1.0p-32, 0x1, -32 );        // scale [-2**63, 2**63] to [-2**31, 2**31]
                 doublePtr[i] = t;
         case kHalf:
             halfPtr = (ushort *)outData;
-            for (i = 0; i < count; i++)
+            for( i = 0; i < count; i++ )
-                if (0 == bitsLeft)
+                if( 0 == bitsLeft)
                     bits = genrand_int32(d);
                     bitsLeft = 32;
-                halfPtr[i] =
-                    bits & 65535; /* Kindly generates random bits for us */
-                bits >>= 16;
-                bitsLeft -= 16;
+                halfPtr[i] = bits & 65535;     /* Kindly generates random bits for us */
+                bits >>= 16; bitsLeft -= 16;
-            log_error(
-                "ERROR: Invalid type passed in to generate_random_data!\n");
+            log_error( "ERROR: Invalid type passed in to generate_random_data!\n" );
-void *create_random_data(ExplicitType type, MTdata d, size_t count)
+void * create_random_data( ExplicitType type, MTdata d, size_t count )
-    void *data = malloc(get_explicit_type_size(type) * count);
-    generate_random_data(type, count, d, data);
+    void *data = malloc( get_explicit_type_size( type ) * count );
+    generate_random_data( type, count, d, data );
     return data;
-cl_long read_upscale_signed(void *inRaw, ExplicitType inType)
+cl_long read_upscale_signed( void *inRaw, ExplicitType inType )
-    switch (inType)
+    switch( inType )
-        case kChar: return (cl_long)(*((cl_char *)inRaw));
+        case kChar:
+            return (cl_long)( *( (cl_char *)inRaw ) );
         case kUChar:
-        case kUnsignedChar: return (cl_long)(*((cl_uchar *)inRaw));
-        case kShort: return (cl_long)(*((cl_short *)inRaw));
+        case kUnsignedChar:
+            return (cl_long)( *( (cl_uchar *)inRaw ) );
+        case kShort:
+            return (cl_long)( *( (cl_short *)inRaw ) );
         case kUShort:
-        case kUnsignedShort: return (cl_long)(*((cl_ushort *)inRaw));
-        case kInt: return (cl_long)(*((cl_int *)inRaw));
+        case kUnsignedShort:
+            return (cl_long)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (cl_long)( *( (cl_int *)inRaw ) );
         case kUInt:
-        case kUnsignedInt: return (cl_long)(*((cl_uint *)inRaw));
-        case kLong: return (cl_long)(*((cl_long *)inRaw));
+        case kUnsignedInt:
+            return (cl_long)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (cl_long)( *( (cl_long *)inRaw ) );
         case kULong:
-        case kUnsignedLong: return (cl_long)(*((cl_ulong *)inRaw));
-        default: return 0;
+        case kUnsignedLong:
+            return (cl_long)( *( (cl_ulong *)inRaw ) );
+        default:
+            return 0;
-cl_ulong read_upscale_unsigned(void *inRaw, ExplicitType inType)
+cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType )
-    switch (inType)
+    switch( inType )
-        case kChar: return (cl_ulong)(*((cl_char *)inRaw));
+        case kChar:
+            return (cl_ulong)( *( (cl_char *)inRaw ) );
         case kUChar:
-        case kUnsignedChar: return (cl_ulong)(*((cl_uchar *)inRaw));
-        case kShort: return (cl_ulong)(*((cl_short *)inRaw));
+        case kUnsignedChar:
+            return (cl_ulong)( *( (cl_uchar *)inRaw ) );
+        case kShort:
+            return (cl_ulong)( *( (cl_short *)inRaw ) );
         case kUShort:
-        case kUnsignedShort: return (cl_ulong)(*((cl_ushort *)inRaw));
-        case kInt: return (cl_ulong)(*((cl_int *)inRaw));
+        case kUnsignedShort:
+            return (cl_ulong)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (cl_ulong)( *( (cl_int *)inRaw ) );
         case kUInt:
-        case kUnsignedInt: return (cl_ulong)(*((cl_uint *)inRaw));
-        case kLong: return (cl_ulong)(*((cl_long *)inRaw));
+        case kUnsignedInt:
+            return (cl_ulong)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (cl_ulong)( *( (cl_long *)inRaw ) );
         case kULong:
-        case kUnsignedLong: return (cl_ulong)(*((cl_ulong *)inRaw));
-        default: return 0;
+        case kUnsignedLong:
+            return (cl_ulong)( *( (cl_ulong *)inRaw ) );
+        default:
+            return 0;
-float read_as_float(void *inRaw, ExplicitType inType)
+float read_as_float( void *inRaw, ExplicitType inType )
-    switch (inType)
+    switch( inType )
-        case kChar: return (float)(*((cl_char *)inRaw));
+        case kChar:
+            return (float)( *( (cl_char *)inRaw ) );
         case kUChar:
-        case kUnsignedChar: return (float)(*((cl_char *)inRaw));
-        case kShort: return (float)(*((cl_short *)inRaw));
+        case kUnsignedChar:
+            return (float)( *( (cl_char *)inRaw ) );
+        case kShort:
+            return (float)( *( (cl_short *)inRaw ) );
         case kUShort:
-        case kUnsignedShort: return (float)(*((cl_ushort *)inRaw));
-        case kInt: return (float)(*((cl_int *)inRaw));
+        case kUnsignedShort:
+            return (float)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (float)( *( (cl_int *)inRaw ) );
         case kUInt:
-        case kUnsignedInt: return (float)(*((cl_uint *)inRaw));
-        case kLong: return (float)(*((cl_long *)inRaw));
+        case kUnsignedInt:
+            return (float)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (float)( *( (cl_long *)inRaw ) );
         case kULong:
-        case kUnsignedLong: return (float)(*((cl_ulong *)inRaw));
-        case kFloat: return *((float *)inRaw);
-        case kDouble: return (float)*((double *)inRaw);
-        default: return 0;
+        case kUnsignedLong:
+            return (float)( *( (cl_ulong *)inRaw ) );
+        case kFloat:
+            return *( (float *)inRaw );
+        case kDouble:
+            return (float) *( (double*)inRaw );
+        default:
+            return 0;
@@ -1160,60 +1140,59 @@
 double get_random_double(double low, double high, MTdata d)
-    cl_ulong u =
-        (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
-    double t = (double)u * MAKE_HEX_DOUBLE(0x1.0p-64, 0x1, -64);
+    cl_ulong u = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32 );
+    double t = (double) u * MAKE_HEX_DOUBLE( 0x1.0p-64, 0x1, -64);
     return (1.0f - t) * low + t * high;
-float any_float(MTdata d)
+float  any_float( MTdata d )
-    union {
-        float f;
+    union
+    {
+        float   f;
         cl_uint u;
-    } u;
+    }u;
     u.u = genrand_int32(d);
     return u.f;
-double any_double(MTdata d)
+double  any_double( MTdata d )
-    union {
-        double f;
+    union
+    {
+        double   f;
         cl_ulong u;
-    } u;
+    }u;
-    u.u = (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
+    u.u = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
     return u.f;
-int random_in_range(int minV, int maxV, MTdata d)
+int          random_in_range( int minV, int maxV, MTdata d )
-    cl_ulong r = ((cl_ulong)genrand_int32(d)) * (maxV - minV + 1);
+    cl_ulong r = ((cl_ulong) genrand_int32(d) ) * (maxV - minV + 1);
     return (cl_uint)(r >> 32) + minV;
 size_t get_random_size_t(size_t low, size_t high, MTdata d)
-    enum
-    {
-        N = sizeof(size_t) / sizeof(int)
-    };
+  enum { N = sizeof(size_t)/sizeof(int) };
-    union {
-        int word[N];
-        size_t size;
-    } u;
+  union {
+    int word[N];
+    size_t size;
+  } u;
-    for (unsigned i = 0; i != N; ++i)
-    {
-        u.word[i] = genrand_int32(d);
-    }
+  for (unsigned i=0; i != N; ++i) {
+    u.word[i] = genrand_int32(d);
+  }
-    assert(low <= high && "Invalid random number range specified");
-    size_t range = high - low;
+  assert(low <= high && "Invalid random number range specified");
+  size_t range = high - low;
-    return (range) ? low + ((u.size - low) % range) : low;
+  return (range) ? low + ((u.size - low) % range) : low;
diff --git a/test_common/harness/conversions.h b/test_common/harness/conversions.h
index e6880e0..50f2838 100644
--- a/test_common/harness/conversions.h
+++ b/test_common/harness/conversions.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -29,7 +29,7 @@
 enum ExplicitTypes
-    kBool = 0,
+    kBool        = 0,
@@ -48,7 +48,7 @@
-typedef enum ExplicitTypes ExplicitType;
+typedef enum ExplicitTypes    ExplicitType;
 enum RoundingTypes
@@ -63,72 +63,61 @@
     kDefaultRoundingType = kRoundToNearest
-typedef enum RoundingTypes RoundingType;
+typedef enum RoundingTypes    RoundingType;
-extern void print_type_to_string(ExplicitType type, void *data, char *string);
-extern size_t get_explicit_type_size(ExplicitType type);
-extern const char *get_explicit_type_name(ExplicitType type);
-extern void convert_explicit_value(void *inRaw, void *outRaw,
-                                   ExplicitType inType, bool saturate,
-                                   RoundingType roundType,
-                                   ExplicitType outType);
+extern void             print_type_to_string(ExplicitType type, void *data, char* string);
+extern size_t           get_explicit_type_size( ExplicitType type );
+extern const char *     get_explicit_type_name( ExplicitType type );
+extern void             convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
-extern void generate_random_data(ExplicitType type, size_t count, MTdata d,
-                                 void *outData);
-extern void *create_random_data(ExplicitType type, MTdata d, size_t count);
+extern void             generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
+extern void    *         create_random_data( ExplicitType type, MTdata d, size_t count );
-extern cl_long read_upscale_signed(void *inRaw, ExplicitType inType);
-extern cl_ulong read_upscale_unsigned(void *inRaw, ExplicitType inType);
-extern float read_as_float(void *inRaw, ExplicitType inType);
+extern cl_long          read_upscale_signed( void *inRaw, ExplicitType inType );
+extern cl_ulong         read_upscale_unsigned( void *inRaw, ExplicitType inType );
+extern float            read_as_float( void *inRaw, ExplicitType inType );
-extern float get_random_float(float low, float high, MTdata d);
-extern double get_random_double(double low, double high, MTdata d);
-extern float any_float(MTdata d);
-extern double any_double(MTdata d);
+extern float            get_random_float(float low, float high, MTdata d);
+extern double           get_random_double(double low, double high, MTdata d);
+extern float            any_float( MTdata d );
+extern double           any_double( MTdata d );
-extern int random_in_range(int minV, int maxV, MTdata d);
+extern int              random_in_range( int minV, int maxV, MTdata d );
 size_t get_random_size_t(size_t low, size_t high, MTdata d);
 // Note: though this takes a double, this is for use with single precision tests
-static inline int IsFloatSubnormal(float x)
+static inline int IsFloatSubnormal( float x )
 #if 2 == FLT_RADIX
     // Do this in integer to avoid problems with FTZ behavior
-    union {
-        float d;
-        uint32_t u;
-    } u;
+    union{ float d; uint32_t u;}u;
     u.d = fabsf(x);
-    return (u.u - 1) < 0x007fffffU;
+    return (u.u-1) < 0x007fffffU;
-    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware --
-    // will fail if you flush subnormals to zero
-    return fabs(x) < (double)FLT_MIN && x != 0.0;
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
-static inline int IsDoubleSubnormal(double x)
+static inline int IsDoubleSubnormal( double x )
 #if 2 == FLT_RADIX
     // Do this in integer to avoid problems with FTZ behavior
-    union {
-        double d;
-        uint64_t u;
-    } u;
-    u.d = fabs(x);
-    return (u.u - 1) < 0x000fffffffffffffULL;
+    union{ double d; uint64_t u;}u;
+    u.d = fabs( x);
+    return (u.u-1) < 0x000fffffffffffffULL;
-    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware --
-    // will fail if you flush subnormals to zero
-    return fabs(x) < (double)DBL_MIN && x != 0.0;
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
-static inline int IsHalfSubnormal(cl_half x)
-    // this relies on interger overflow to exclude 0 as a subnormal
-    return ((x & 0x7fffU) - 1U) < 0x03ffU;
+static inline int IsHalfSubnormal( cl_half x )
+    return ( ( x & 0x7fffU ) - 1U ) < 0x03ffU; 
 #endif // _conversions_h
diff --git a/test_common/harness/crc32.h b/test_common/harness/crc32.h
index 65ca15e..1913063 100644
--- a/test_common/harness/crc32.h
+++ b/test_common/harness/crc32.h
@@ -18,7 +18,7 @@
 #ifndef _CRC32_H_
 #define _CRC32_H_
-#include <stdint.h>
+#include <stdint.h> 
 #include <stddef.h>
 uint32_t crc32(const void *buf, size_t size);
diff --git a/test_common/harness/deviceInfo.cpp b/test_common/harness/deviceInfo.cpp
index 287a142..a5b0a58 100644
--- a/test_common/harness/deviceInfo.cpp
+++ b/test_common/harness/deviceInfo.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017-2019 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,14 +24,14 @@
 /* Helper to return a string containing device information for the specified
  * device info parameter. */
-std::string get_device_info_string(cl_device_id device,
-                                   cl_device_info param_name)
+static std::string get_device_info_string(cl_device_id device,
+                                          cl_device_info param_name)
     size_t size = 0;
     int err;
     if ((err = clGetDeviceInfo(device, param_name, 0, NULL, &size))
-            != CL_SUCCESS
+        != CL_SUCCESS
         || size == 0)
         throw std::runtime_error("clGetDeviceInfo failed\n");
@@ -45,8 +45,7 @@
         throw std::runtime_error("clGetDeviceInfo failed\n");
-    /* The returned string does not include the null terminator. */
-    return std::string(, size - 1);
+    return std::string(info.begin(), info.end());
 /* Determines if an extension is supported by a device. */
@@ -80,21 +79,3 @@
     return get_device_info_string(device, CL_DEVICE_VERSION);
-/* Returns a string containing the device name. */
-std::string get_device_name(cl_device_id device)
-    return get_device_info_string(device, CL_DEVICE_NAME);
-size_t get_max_param_size(cl_device_id device)
-    size_t ret(0);
-    if (clGetDeviceInfo(device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(ret), &ret,
-                        nullptr)
-        != CL_SUCCESS)
-    {
-        throw std::runtime_error("clGetDeviceInfo failed\n");
-    }
-    return ret;
diff --git a/test_common/harness/deviceInfo.h b/test_common/harness/deviceInfo.h
index f8c5580..d4432ea 100644
--- a/test_common/harness/deviceInfo.h
+++ b/test_common/harness/deviceInfo.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017-2019 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -23,11 +23,6 @@
 #include <CL/opencl.h>
-/* Helper to return a string containing device information for the specified
- * device info parameter. */
-std::string get_device_info_string(cl_device_id device,
-                                   cl_device_info param_name);
 /* Determines if an extension is supported by a device. */
 int is_extension_available(cl_device_id device, const char *extensionName);
@@ -40,10 +35,4 @@
 /* Returns a string containing the supported OpenCL version for a device. */
 std::string get_device_version_string(cl_device_id device);
-/* Returns a string containing the device name. */
-std::string get_device_name(cl_device_id device);
-// Returns the maximum size in bytes for Kernel Parameters
-size_t get_max_param_size(cl_device_id device);
 #endif // _deviceInfo_h
diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp
index 22a2677..343be3f 100644
--- a/test_common/harness/errorHelpers.cpp
+++ b/test_common/harness/errorHelpers.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -22,105 +22,87 @@
 #include "parseParameters.h"
-#include <CL/cl_half.h>
-const char *IGetErrorString(int clErrorCode)
+const char    *IGetErrorString( int clErrorCode )
-    switch (clErrorCode)
+    switch( clErrorCode )
-        case CL_SUCCESS: return "CL_SUCCESS";
-        case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
-        case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
-        case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
-            return "CL_PROFILING_INFO_NOT_AVAILABLE";
-        case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
-            return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+        case CL_SUCCESS:                return "CL_SUCCESS";
+        case CL_DEVICE_NOT_FOUND:        return "CL_DEVICE_NOT_FOUND";
+        case CL_OUT_OF_RESOURCES:        return "CL_OUT_OF_RESOURCES";
+        case CL_OUT_OF_HOST_MEMORY:        return "CL_OUT_OF_HOST_MEMORY";
+        case CL_MEM_COPY_OVERLAP:        return "CL_MEM_COPY_OVERLAP";
-        case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
-            return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+        case CL_MAP_FAILURE:            return "CL_MAP_FAILURE";
-            return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
-        case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
+        case CL_INVALID_VALUE:            return "CL_INVALID_VALUE";
-        case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
-        case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
-        case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
-            return "CL_INVALID_PROGRAM_EXECUTABLE";
-            return "CL_INVALID_KERNEL_DEFINITION";
-        case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
-        case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
-        case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
+        case CL_INVALID_DEVICE:            return "CL_INVALID_DEVICE";
+        case CL_INVALID_CONTEXT:        return "CL_INVALID_CONTEXT";
+        case CL_INVALID_HOST_PTR:    return "CL_INVALID_HOST_PTR";
+        case CL_INVALID_MEM_OBJECT:        return "CL_INVALID_MEM_OBJECT";
+        case CL_INVALID_IMAGE_SIZE:        return "CL_INVALID_IMAGE_SIZE";
+        case CL_INVALID_SAMPLER:        return "CL_INVALID_SAMPLER";
+        case CL_INVALID_BINARY:        return "CL_INVALID_BINARY";
+        case CL_INVALID_PROGRAM:        return "CL_INVALID_PROGRAM";
+        case CL_INVALID_KERNEL:            return "CL_INVALID_KERNEL";
+        case CL_INVALID_ARG_INDEX:        return "CL_INVALID_ARG_INDEX";
+        case CL_INVALID_ARG_VALUE:        return "CL_INVALID_ARG_VALUE";
+        case CL_INVALID_ARG_SIZE:        return "CL_INVALID_ARG_SIZE";
+        case CL_INVALID_EVENT:            return "CL_INVALID_EVENT";
+        case CL_INVALID_OPERATION:        return "CL_INVALID_OPERATION";
+        case CL_INVALID_GL_OBJECT:        return "CL_INVALID_GL_OBJECT";
+        case CL_INVALID_MIP_LEVEL:      return "CL_INVALID_MIP_LEVEL";
-        case CL_INVALID_SPEC_ID: return "CL_INVALID_SPEC_ID";
         default: return "(unknown)";
-const char *GetChannelOrderName(cl_channel_order order)
+const char *GetChannelOrderName( cl_channel_order order )
-    switch (order)
+    switch( order )
-        case CL_R: return "CL_R";
-        case CL_A: return "CL_A";
-        case CL_Rx: return "CL_Rx";
-        case CL_RG: return "CL_RG";
-        case CL_RA: return "CL_RA";
-        case CL_RGx: return "CL_RGx";
-        case CL_RGB: return "CL_RGB";
-        case CL_RGBx: return "CL_RGBx";
-        case CL_RGBA: return "CL_RGBA";
-        case CL_ARGB: return "CL_ARGB";
-        case CL_BGRA: return "CL_BGRA";
+        case CL_R:      return "CL_R";
+        case CL_A:      return "CL_A";
+        case CL_Rx:     return "CL_Rx";
+        case CL_RG:     return "CL_RG";
+        case CL_RA:     return "CL_RA";
+        case CL_RGx:    return "CL_RGx";
+        case CL_RGB:    return "CL_RGB";
+        case CL_RGBx:   return "CL_RGBx";
+        case CL_RGBA:      return "CL_RGBA";
+        case CL_ARGB:      return "CL_ARGB";
+        case CL_BGRA:      return "CL_BGRA";
         case CL_INTENSITY: return "CL_INTENSITY";
         case CL_LUMINANCE: return "CL_LUMINANCE";
 #if defined CL_1RGB_APPLE
@@ -143,9 +125,9 @@
-int IsChannelOrderSupported(cl_channel_order order)
+int IsChannelOrderSupported( cl_channel_order order )
-    switch (order)
+    switch( order )
         case CL_R:
         case CL_A:
@@ -165,47 +147,51 @@
         case CL_sRGBx:
         case CL_sBGRA:
         case CL_sRGBA:
-        case CL_DEPTH: return 1;
+        case CL_DEPTH:
+            return 1;
 #if defined CL_1RGB_APPLE
-        case CL_1RGB_APPLE: return 1;
+        case CL_1RGB_APPLE:
+            return 1;
 #if defined CL_BGR1_APPLE
-        case CL_BGR1_APPLE: return 1;
+        case CL_BGR1_APPLE:
+            return 1;
-        default: return 0;
+        default:
+            return 0;
-const char *GetChannelTypeName(cl_channel_type type)
+const char *GetChannelTypeName( cl_channel_type type )
-    switch (type)
+    switch( type )
-        case CL_SNORM_INT8: return "CL_SNORM_INT8";
-        case CL_SNORM_INT16: return "CL_SNORM_INT16";
-        case CL_UNORM_INT8: return "CL_UNORM_INT8";
-        case CL_UNORM_INT16: return "CL_UNORM_INT16";
-        case CL_UNORM_SHORT_565: return "CL_UNORM_SHORT_565";
-        case CL_UNORM_SHORT_555: return "CL_UNORM_SHORT_555";
-        case CL_UNORM_INT_101010: return "CL_UNORM_INT_101010";
-        case CL_SIGNED_INT8: return "CL_SIGNED_INT8";
-        case CL_SIGNED_INT16: return "CL_SIGNED_INT16";
-        case CL_SIGNED_INT32: return "CL_SIGNED_INT32";
-        case CL_UNSIGNED_INT8: return "CL_UNSIGNED_INT8";
-        case CL_UNSIGNED_INT16: return "CL_UNSIGNED_INT16";
-        case CL_UNSIGNED_INT32: return "CL_UNSIGNED_INT32";
-        case CL_HALF_FLOAT: return "CL_HALF_FLOAT";
-        case CL_FLOAT: return "CL_FLOAT";
+        case CL_SNORM_INT8:         return "CL_SNORM_INT8";
+        case CL_SNORM_INT16:        return "CL_SNORM_INT16";
+        case CL_UNORM_INT8:         return "CL_UNORM_INT8";
+        case CL_UNORM_INT16:        return "CL_UNORM_INT16";
+        case CL_UNORM_SHORT_565:    return "CL_UNORM_SHORT_565";
+        case CL_UNORM_SHORT_555:    return "CL_UNORM_SHORT_555";
+        case CL_UNORM_INT_101010:   return "CL_UNORM_INT_101010";
+        case CL_SIGNED_INT8:        return "CL_SIGNED_INT8";
+        case CL_SIGNED_INT16:       return "CL_SIGNED_INT16";
+        case CL_SIGNED_INT32:       return "CL_SIGNED_INT32";
+        case CL_UNSIGNED_INT8:      return "CL_UNSIGNED_INT8";
+        case CL_UNSIGNED_INT16:     return "CL_UNSIGNED_INT16";
+        case CL_UNSIGNED_INT32:     return "CL_UNSIGNED_INT32";
+        case CL_HALF_FLOAT:         return "CL_HALF_FLOAT";
+        case CL_FLOAT:              return "CL_FLOAT";
-        case CL_SFIXED14_APPLE: return "CL_SFIXED14_APPLE";
+        case CL_SFIXED14_APPLE:     return "CL_SFIXED14_APPLE";
-        case CL_UNORM_INT24: return "CL_UNORM_INT24";
-        default: return NULL;
+        case CL_UNORM_INT24:        return "CL_UNORM_INT24";
+        default:                    return NULL;
-int IsChannelTypeSupported(cl_channel_type type)
+int IsChannelTypeSupported( cl_channel_type type )
-    switch (type)
+    switch( type )
         case CL_SNORM_INT8:
         case CL_SNORM_INT16:
@@ -222,408 +208,440 @@
         case CL_UNSIGNED_INT16:
         case CL_UNSIGNED_INT32:
         case CL_HALF_FLOAT:
-        case CL_FLOAT: return 1;
+        case CL_FLOAT:
+            return 1;
-        case CL_SFIXED14_APPLE: return 1;
+        case CL_SFIXED14_APPLE:
+            return 1;
-        default: return 0;
+        default:
+            return 0;
-const char *GetAddressModeName(cl_addressing_mode mode)
+const char *GetAddressModeName( cl_addressing_mode mode )
-    switch (mode)
+    switch( mode )
-        case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
-        case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
-        case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
-        default: return NULL;
+        case CL_ADDRESS_NONE:                return "CL_ADDRESS_NONE";
+        case CL_ADDRESS_CLAMP_TO_EDGE:        return "CL_ADDRESS_CLAMP_TO_EDGE";
+        case CL_ADDRESS_CLAMP:                return "CL_ADDRESS_CLAMP";
+        case CL_ADDRESS_REPEAT:                return "CL_ADDRESS_REPEAT";
+        default:                            return NULL;
-const char *GetDeviceTypeName(cl_device_type type)
+const char *GetDeviceTypeName( cl_device_type type )
-    switch (type)
+    switch( type )
-        case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
-        case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
-        case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
-        default: return NULL;
+        case CL_DEVICE_TYPE_GPU:    return "CL_DEVICE_TYPE_GPU";
+        case CL_DEVICE_TYPE_CPU:    return "CL_DEVICE_TYPE_CPU";
+        case CL_DEVICE_TYPE_ALL:    return "CL_DEVICE_TYPE_ALL";
+        default:                    return NULL;
-const char *GetDataVectorString(void *dataBuffer, size_t typeSize,
-                                size_t vecSize, char *buffer)
+const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
-    static char scratch[1024];
+    static char scratch[ 1024 ];
     size_t i, j;
-    if (buffer == NULL) buffer = scratch;
+    if( buffer == NULL )
+        buffer = scratch;
     unsigned char *p = (unsigned char *)dataBuffer;
     char *bPtr;
-    buffer[0] = 0;
+    buffer[ 0 ] = 0;
     bPtr = buffer;
-    for (i = 0; i < vecSize; i++)
+    for( i = 0; i < vecSize; i++ )
-        if (i > 0)
+        if( i > 0 )
-            bPtr[0] = ' ';
+            bPtr[ 0 ] = ' ';
-        for (j = 0; j < typeSize; j++)
+        for( j = 0; j < typeSize; j++ )
-            sprintf(bPtr, "%02x", (unsigned int)p[typeSize - j - 1]);
+            sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
             bPtr += 2;
         p += typeSize;
-    bPtr[0] = 0;
+    bPtr[ 0 ] = 0;
     return buffer;
-const char *GetQueuePropertyName(cl_command_queue_properties property)
-    switch (property)
-    {
-        case CL_QUEUE_ON_DEVICE: return "CL_QUEUE_ON_DEVICE";
-        default: return "(unknown)";
-    }
 #ifndef MAX
-#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
+#define MAX( _a, _b )       ((_a) > (_b) ? (_a) : (_b))
-#if defined(_MSC_VER)
-#define scalbnf(_a, _i) ldexpf(_a, _i)
-#define scalbn(_a, _i) ldexp(_a, _i)
-#define scalbnl(_a, _i) ldexpl(_a, _i)
+#if defined( _MSC_VER )
+#define scalbnf(_a, _i )    ldexpf( _a, _i )
+#define scalbn(_a, _i )     ldexp( _a, _i )
+#define scalbnl(_a, _i )    ldexpl( _a, _i )
+static float Ulp_Error_Half_Float( float test, double reference );
+static inline float  half2float( cl_ushort half );
 // taken from math tests
-#define HALF_MIN_EXP -13
-#define HALF_MANT_DIG 11
-static float Ulp_Error_Half_Float(float test, double reference)
+#define HALF_MIN_EXP    -13
+#define HALF_MANT_DIG    11
+static float Ulp_Error_Half_Float( float test, double reference )
-    union {
-        double d;
-        uint64_t u;
-    } u;
-    u.d = reference;
+    union{ double d; uint64_t u; }u;     u.d = reference;
-    // Note: This function presumes that someone has already tested whether the
-    // result is correctly, rounded before calling this function.  That test:
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
     //    if( (float) reference == test )
     //        return 0.0f;
-    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
-    // before we get here. Otherwise, we'll return inf ulp error here, for what
-    // are otherwise correctly rounded results.
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
     double testVal = test;
-    if (isinf(reference))
-    {
-        if (testVal == reference) return 0.0f;
-        return (float)(testVal - reference);
-    }
-    if (isinf(testVal))
-    {
-        // Allow overflow within the limit of the allowed ulp error. Towards
-        // that end we pretend the test value is actually 2**16, the next value
-        // that would appear in the number line if half had sufficient range.
-        testVal = copysign(65536.0, testVal);
-    }
-    if (u.u & 0x000fffffffffffffULL)
+    if( u.u & 0x000fffffffffffffULL )
     { // Non-power of two and NaN
-        if (isnan(reference) && isnan(test))
-            return 0.0f; // if we are expecting a NaN, any NaN is fine
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
         // The unbiased exponent of the ulp unit place
-        int ulp_exp =
-            HALF_MANT_DIG - 1 - MAX(ilogb(reference), HALF_MIN_EXP - 1);
+        int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
         // Scale the exponent of the error
-        return (float)scalbn(testVal - reference, ulp_exp);
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+    if( isinf( reference ) )
+    {
+        if( (double) test == reference )
+            return 0.0f;
+        return (float) (testVal - reference );
     // reference is a normal power of two or a zero
-    int ulp_exp =
-        HALF_MANT_DIG - 1 - MAX(ilogb(reference) - 1, HALF_MIN_EXP - 1);
+    int ulp_exp =  HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
     // Scale the exponent of the error
-    return (float)scalbn(testVal - reference, ulp_exp);
+    return (float) scalbn( testVal - reference, ulp_exp );
-float Ulp_Error_Half(cl_half test, float reference)
+// Taken from vLoadHalf test
+static inline float half2float( cl_ushort us )
-    return Ulp_Error_Half_Float(cl_half_to_float(test), reference);
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ unsigned int u; float f;}uu;
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+        int shift = __builtin_clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+            return uu.f;
+        }
+    exponent += 127 - 15;
+    exponent <<= 23;
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+    return uu.f;
+float Ulp_Error_Half( cl_ushort test, float reference )
+    return Ulp_Error_Half_Float( half2float(test), reference );
-float Ulp_Error(float test, double reference)
+float Ulp_Error( float test, double reference )
-    union {
-        double d;
-        uint64_t u;
-    } u;
-    u.d = reference;
+    union{ double d; uint64_t u; }u;     u.d = reference;
     double testVal = test;
-    // Note: This function presumes that someone has already tested whether the
-    // result is correctly, rounded before calling this function.  That test:
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
     //    if( (float) reference == test )
     //        return 0.0f;
-    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
-    // before we get here. Otherwise, we'll return inf ulp error here, for what
-    // are otherwise correctly rounded results.
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
-    if (isinf(reference))
+    if( isinf( reference ) )
-        if (testVal == reference) return 0.0f;
+        if( testVal == reference )
+            return 0.0f;
-        return (float)(testVal - reference);
+        return (float) (testVal - reference );
-    if (isinf(testVal))
-    { // infinite test value, but finite (but possibly overflowing in float)
-      // reference.
+    if( isinf( testVal) )
+    { // infinite test value, but finite (but possibly overflowing in float) reference.
-      // The function probably overflowed prematurely here. Formally, the spec
-      // says this is an infinite ulp error and should not be tolerated.
-      // Unfortunately, this would mean that the internal precision of some
-      // half_pow implementations would have to be 29+ bits at half_powr(
-      // 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
-      // is not exactly 128.0. You might represent this for example as 4*(32 -
-      // ~2**-24), which after rounding to single is 4*32 = 128, which will
-      // ultimately result in premature overflow, even though a good faith
-      // representation would be correct to within 2**-29 interally.
+      // The function probably overflowed prematurely here. Formally, the spec says this is
+      // an infinite ulp error and should not be tolerated. Unfortunately, this would mean
+      // that the internal precision of some half_pow implementations would have to be 29+ bits
+      // at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
+      // is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
+      // after rounding to single is 4*32 = 128, which will ultimately result in premature
+      // overflow, even though a good faith representation would be correct to within 2**-29
+      // interally.
-        // In the interest of not requiring the implementation go to
-        // extraordinary lengths to deliver a half precision function, we allow
-        // premature overflow within the limit of the allowed ulp error.
-        // Towards, that end, we "pretend" the test value is actually 2**128,
-        // the next value that would appear in the number line if float had
-        // sufficient range.
-        testVal = copysign(MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal);
+        // In the interest of not requiring the implementation go to extraordinary lengths to
+        // deliver a half precision function, we allow premature overflow within the limit
+        // of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
+        // 2**128, the next value that would appear in the number line if float had sufficient range.
+        testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
-        // Note that the same hack may not work in long double, which is not
-        // guaranteed to have more range than double.  It is not clear that
-        // premature overflow should be tolerated for double.
+        // Note that the same hack may not work in long double, which is not guaranteed to have
+        // more range than double.  It is not clear that premature overflow should be tolerated for
+        // double.
-    if (u.u & 0x000fffffffffffffULL)
+    if( u.u & 0x000fffffffffffffULL )
     { // Non-power of two and NaN
-        if (isnan(reference) && isnan(test))
-            return 0.0f; // if we are expecting a NaN, any NaN is fine
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
         // The unbiased exponent of the ulp unit place
-        int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference), FLT_MIN_EXP - 1);
+        int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
         // Scale the exponent of the error
-        return (float)scalbn(testVal - reference, ulp_exp);
+        return (float) scalbn( testVal - reference, ulp_exp );
     // reference is a normal power of two or a zero
     // The unbiased exponent of the ulp unit place
-    int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference) - 1, FLT_MIN_EXP - 1);
+    int ulp_exp =  FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
     // Scale the exponent of the error
-    return (float)scalbn(testVal - reference, ulp_exp);
+    return (float) scalbn( testVal - reference, ulp_exp );
-float Ulp_Error_Double(double test, long double reference)
+float Ulp_Error_Double( double test, long double reference )
-    // Deal with long double = double
-    // On most systems long double is a higher precision type than double. They
-    // provide either a 80-bit or greater floating point type, or they provide a
-    // head-tail double double format. That is sufficient to represent the
-    // accuracy of a floating point result to many more bits than double and we
-    // can calculate sub-ulp errors. This is the standard system for which this
-    // test suite is designed.
-    //
-    // On some systems double and long double are the same thing. Then we run
-    // into a problem, because our representation of the infinitely precise
-    // result (passed in as reference above) can be off by as much as a half
-    // double precision ulp itself.  In this case, we inflate the reported error
-    // by half an ulp to take this into account.  A more correct and permanent
-    // fix would be to undertake refactoring the reference code to return
-    // results in this format:
-    //
-    //    typedef struct DoubleReference
-    //    {
-    //        // true value = correctlyRoundedResult + ulps *
-    //        //    ulp(correctlyRoundedResult)  (infinitely precise)
-    //        // as best we can:
-    //        double correctlyRoundedResult;
-    //        // plus a fractional amount to account for the difference
-    //        // between infinitely precise result and correctlyRoundedResult,
-    //        // in units of ulps:
-    //        double ulps;
-    //    } DoubleReference;
-    //
-    // This would provide a useful higher-than-double precision format for
-    // everyone that we can use, and would solve a few problems with
-    // representing absolute errors below DBL_MIN and over DBL_MAX for systems
-    // that use a head to tail double double for long double.
+  // Deal with long double = double
+  // On most systems long double is a higher precision type than double. They provide either
+  // a 80-bit or greater floating point type, or they provide a head-tail double double format.
+  // That is sufficient to represent the accuracy of a floating point result to many more bits
+  // than double and we can calculate sub-ulp errors. This is the standard system for which this
+  // test suite is designed.
+  //
+  // On some systems double and long double are the same thing. Then we run into a problem,
+  // because our representation of the infinitely precise result (passed in as reference above)
+  // can be off by as much as a half double precision ulp itself.  In this case, we inflate the
+  // reported error by half an ulp to take this into account.  A more correct and permanent fix
+  // would be to undertake refactoring the reference code to return results in this format:
+  //
+  //    typedef struct DoubleReference
+  //    { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult)        (infinitely precise)
+  //        double  correctlyRoundedResult;     // as best we can
+  //        double  ulps;                       // plus a fractional amount to account for the difference
+  //    }DoubleReference;                       //     between infinitely precise result and correctlyRoundedResult, in units of ulps.
+  //
+  // This would provide a useful higher-than-double precision format for everyone that we can use,
+  // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
+  // that use a head to tail double double for long double.
-    // Note: This function presumes that someone has already tested whether the
-    // result is correctly, rounded before calling this function.  That test:
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
     //    if( (float) reference == test )
     //        return 0.0f;
-    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
-    // before we get here. Otherwise, we'll return inf ulp error here, for what
-    // are otherwise correctly rounded results.
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
     int x;
     long double testVal = test;
-    if (0.5L != frexpl(reference, &x))
+    if( 0.5L != frexpl( reference, &x) )
     { // Non-power of two and NaN
-        if (isinf(reference))
+        if( isinf( reference ) )
-            if (testVal == reference) return 0.0f;
+            if( testVal == reference )
+                return 0.0f;
-            return (float)(testVal - reference);
+            return (float) ( testVal - reference );
-        if (isnan(reference) && isnan(test))
-            return 0.0f; // if we are expecting a NaN, any NaN is fine
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
         // The unbiased exponent of the ulp unit place
-        int ulp_exp =
-            DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
+        int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
         // Scale the exponent of the error
-        float result = (float)scalbnl(testVal - reference, ulp_exp);
+        float result = (float) scalbnl( testVal - reference, ulp_exp );
-        // account for rounding error in reference result on systems that do not
-        // have a higher precision floating point type (see above)
-        if (sizeof(long double) == sizeof(double))
-            result += copysignf(0.5f, result);
+        // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+        if( sizeof(long double) == sizeof( double ) )
+            result += copysignf( 0.5f, result);
         return result;
     // reference is a normal power of two or a zero
     // The unbiased exponent of the ulp unit place
-    int ulp_exp =
-        DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
+    int ulp_exp =  DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
     // Scale the exponent of the error
-    float result = (float)scalbnl(testVal - reference, ulp_exp);
+    float result = (float) scalbnl( testVal - reference, ulp_exp );
-    // account for rounding error in reference result on systems that do not
-    // have a higher precision floating point type (see above)
-    if (sizeof(long double) == sizeof(double))
-        result += copysignf(0.5f, result);
+    // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+    if( sizeof(long double) == sizeof( double ) )
+        result += copysignf( 0.5f, result);
     return result;
-cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
-                       cl_device_id *device_list)
+cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
-    int error;
-    size_t size_ret;
+  int error;
+  size_t size_ret;
-    // Does the program object exist?
-    if (program != NULL)
-    {
+  // Does the program object exist?
+  if (program != NULL) {
-        // Was the number of devices given
-        if (num_devices == 0)
-        {
+    // Was the number of devices given
+    if (num_devices == 0) {
-            // If zero devices were specified then allocate and query the device
-            // list from the context
-            cl_context context;
-            error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT,
-                                     sizeof(context), &context, NULL);
-            test_error(error, "Unable to query program's context");
-            error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
-                                     &size_ret);
-            test_error(error, "Unable to query context's device size");
-            num_devices = size_ret / sizeof(cl_device_id);
-            device_list = (cl_device_id *)malloc(size_ret);
-            if (device_list == NULL)
-            {
-                print_error(error, "malloc failed");
-                return CL_OUT_OF_HOST_MEMORY;
-            }
-            error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret,
-                                     device_list, NULL);
-            test_error(error, "Unable to query context's devices");
-        }
+      // If zero devices were specified then allocate and query the device list from the context
+      cl_context context;
+      error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
+      test_error( error, "Unable to query program's context" );
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
+      test_error( error, "Unable to query context's device size" );
+      num_devices = size_ret / sizeof(cl_device_id);
+      device_list = (cl_device_id *) malloc(size_ret);
+      if (device_list == NULL) {
+          print_error( error, "malloc failed" );
+          return CL_OUT_OF_HOST_MEMORY;
+      }
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
+      test_error( error, "Unable to query context's devices" );
-        // For each device in the device_list
-        unsigned int i;
-        for (i = 0; i < num_devices; i++)
-        {
-            // Get the build status
-            cl_build_status build_status;
-            error = clGetProgramBuildInfo(
-                program, device_list[i], CL_PROGRAM_BUILD_STATUS,
-                sizeof(build_status), &build_status, &size_ret);
-            test_error(error, "Unable to query build status");
-            // If the build failed then log the status, and allocate the build
-            // log, log it and free it
-            if (build_status != CL_BUILD_SUCCESS)
-            {
-                log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n",
-                          (int)build_status);
-                error = clGetProgramBuildInfo(program, device_list[i],
-                                              CL_PROGRAM_BUILD_LOG, 0, NULL,
-                                              &size_ret);
-                test_error(error, "Unable to query build log size");
-                char *build_log = (char *)malloc(size_ret);
-                error = clGetProgramBuildInfo(program, device_list[i],
-                                              CL_PROGRAM_BUILD_LOG, size_ret,
-                                              build_log, &size_ret);
-                test_error(error, "Unable to query build log");
-                log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
-                free(build_log);
-            }
-        }
-        // Was the number of devices given
-        if (num_devices == 0)
-        {
-            // If zero devices were specified then free the device list
-            free(device_list);
-        }
-    return CL_SUCCESS;
+    // For each device in the device_list
+    unsigned int i;
+    for (i = 0; i < num_devices; i++) {
+      // Get the build status
+      cl_build_status build_status;
+      error = clGetProgramBuildInfo(program,
+                                    device_list[i],
+                                    CL_PROGRAM_BUILD_STATUS,
+                                    sizeof(build_status),
+                                    &build_status,
+                                    &size_ret);
+      test_error( error, "Unable to query build status" );
+      // If the build failed then log the status, and allocate the build log, log it and free it
+      if (build_status != CL_BUILD_SUCCESS) {
+        log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to query build log size" );
+        char *build_log = (char *) malloc(size_ret);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to query build log" );
+        log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        free(build_log);
+      }
+    }
+    // Was the number of devices given
+    if (num_devices == 0) {
+      // If zero devices were specified then free the device list
+      free(device_list);
+    }
+  }
+  return CL_SUCCESS;
-const char *subtests_to_skip_with_offline_compiler[] = {
-    "get_kernel_arg_info",
-    "binary_create",
+const char * subtests_requiring_opencl_1_2[] = {
+            "device_partition_equally",
+            "device_partition_by_counts",
+            "device_partition_by_affinity_domain_numa",
+            "device_partition_by_affinity_domain_l4_cache",
+            "device_partition_by_affinity_domain_l3_cache",
+            "device_partition_by_affinity_domain_l2_cache",
+            "device_partition_by_affinity_domain_l1_cache",
+            "device_partition_by_affinity_domain_next_partitionable",
+            "device_partition_all",
+    "buffer_fill_int",
+    "buffer_fill_uint",
+    "buffer_fill_short",
+    "buffer_fill_ushort",
+    "buffer_fill_char",
+    "buffer_fill_uchar",
+    "buffer_fill_long",
+    "buffer_fill_ulong",
+    "buffer_fill_float",
+    "buffer_fill_struct",
+  "test_mem_host_write_only_buffer",
+  "test_mem_host_write_only_subbuffer",
+  "test_mem_host_no_access_buffer",
+  "test_mem_host_no_access_subbuffer",
+  "test_mem_host_read_only_image",
+  "test_mem_host_write_only_image",
+  "test_mem_host_no_access_image",
+    "get_buffer_info",
+    "get_image1d_info",
+    "get_image1d_array_info",
+    "get_image2d_array_info",
+  // gl/
+  "images_read_1D",
+  "images_write_1D",
+  "images_1D_getinfo",
+  "images_read_1Darray",
+  "images_write_1Darray",
+  "images_1Darray_getinfo",
+  "images_read_2Darray",
+  "images_write_2Darray",
+  "images_2Darray_getinfo",
+    "buffer_migrate",
+    "image_migrate",
+   // compiler/
@@ -633,6 +651,9 @@
+    "get_program_info",
+    "large_compile",
+    "async_build",
@@ -644,16 +665,32 @@
+    "simple_compile_only",
+    "simple_static_compile_only",
+    "simple_compile_with_callback",
+    "simple_link_only",
+    "simple_link_with_callback",
+    "execute_after_simple_compile_and_link",
+    "execute_after_simple_compile_and_link_no_device_info",
+    "execute_after_simple_library_with_link",
+    "execute_after_two_file_link",
+    "execute_after_two_file_link",
+    "execute_after_serialize_reload_object",
+    "execute_after_serialize_reload_library",
+    "simple_library_only",
+    "simple_library_with_callback",
+    "simple_library_with_link",
+    "two_file_link",
@@ -661,47 +698,66 @@
-    "kernel_preprocessor_macros",
-    "execute_after_serialize_reload_library",
-    "execute_after_serialize_reload_object",
-    "execute_after_simple_compile_and_link",
-    "execute_after_simple_compile_and_link_no_device_info",
-    "execute_after_simple_library_with_link",
-    "execute_after_two_file_link",
-    "simple_compile_only",
-    "simple_compile_with_callback",
-    "simple_library_only",
-    "simple_library_with_callback",
-    "simple_library_with_link",
-    "simple_link_only",
-    "simple_link_with_callback",
-    "simple_static_compile_only",
-    "two_file_link",
-    "async_build",
-    "unload_repeated",
-    "unload_compile_unload_link",
-    "unload_build_unload_create_kernel",
-    "unload_link_different",
-    "unload_build_threaded",
-    "unload_build_info",
-    "unload_program_binaries",
-    "features_macro",
-    "progvar_prog_scope_misc",
-    "library_function"
+    // CL_PROGRAM_NUM_KERNELS, in api/
+    "get_kernel_arg_info",
+    "create_kernels_in_program",
+    // clEnqueue..WithWaitList, in events/
+    "event_enqueue_marker_with_event_list",
+    "event_enqueue_barrier_with_event_list",
+    "popcount"
-int check_functions_for_offline_compiler(const char *subtestname,
-                                         cl_device_id device)
+const char * subtests_to_skip_with_offline_compiler[] = {
+            "get_kernel_arg_info",
+            "binary_create",
+            "load_program_source",
+            "load_multistring_source",
+            "load_two_kernel_source",
+            "load_null_terminated_source",
+            "load_null_terminated_multi_line_source",
+            "load_null_terminated_partial_multi_line_source",
+            "load_discreet_length_source",
+            "get_program_source",
+            "get_program_build_info",
+            "options_build_optimizations",
+            "options_build_macro",
+            "options_build_macro_existence",
+            "options_include_directory",
+            "options_denorm_cache",
+            "preprocessor_define_udef",
+            "preprocessor_include",
+            "preprocessor_line_error",
+            "preprocessor_pragma",
+            "compiler_defines_for_extensions",
+            "image_macro",
+            "simple_extern_compile_only",
+            "simple_embedded_header_compile",
+            "two_file_regular_variable_access",
+            "two_file_regular_struct_access",
+            "two_file_regular_function_access",
+            "simple_embedded_header_link",
+            "execute_after_simple_compile_and_link_with_defines",
+            "execute_after_simple_compile_and_link_with_callbacks",
+            "execute_after_embedded_header_link",
+            "execute_after_included_header_link",
+            "multi_file_libraries",
+            "multiple_files",
+            "multiple_libraries",
+            "multiple_files_multiple_libraries",
+            "multiple_embedded_headers",
+            "program_binary_type",
+            "compile_and_link_status_options_log",
+            "kernel_preprocessor_macros",
+int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device)
     if (gCompilationMode != kOnline)
-        int nNotRequiredWithOfflineCompiler =
-            sizeof(subtests_to_skip_with_offline_compiler) / sizeof(char *);
+        int nNotRequiredWithOfflineCompiler = sizeof(subtests_to_skip_with_offline_compiler)/sizeof(char *);
         size_t i;
-        for (i = 0; i < nNotRequiredWithOfflineCompiler; ++i)
-        {
-            if (!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i]))
-            {
+        for(i=0; i < nNotRequiredWithOfflineCompiler; ++i) {
+            if(!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i])) {
                 return 1;
diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
index 1944601..727c213 100644
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,179 +24,91 @@
 #include <CL/opencl.h>
 #include <stdlib.h>
-#define LOWER_IS_BETTER 0
+#define LOWER_IS_BETTER     0
+#define HIGHER_IS_BETTER    1
 #include <stdio.h>
 #define test_start()
 #define log_info printf
 #define log_error printf
 #define log_missing_feature printf
-#define log_perf(_number, _higherBetter, _numType, _format, ...)               \
-    printf("Performance Number " _format " (in %s, %s): %g\n", ##__VA_ARGS__,  \
-           _numType, _higherBetter ? "higher is better" : "lower is better",   \
-           _number)
-#define vlog_perf(_number, _higherBetter, _numType, _format, ...)              \
-    printf("Performance Number " _format " (in %s, %s): %g\n", ##__VA_ARGS__,  \
-           _numType, _higherBetter ? "higher is better" : "lower is better",   \
-           _number)
+#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,        \
+                    _higherBetter?"higher is better":"lower is better", _number )
+#define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,    \
+                    _higherBetter?"higher is better":"lower is better" , _number)
 #ifdef _WIN32
-#ifdef __MINGW32__
-// Use __mingw_printf since it supports "%a" format specifier
-#define vlog __mingw_printf
-#define vlog_error __mingw_printf
+    #ifdef __MINGW32__
+        // Use __mingw_printf since it supports "%a" format specifier
+        #define vlog __mingw_printf
+        #define vlog_error __mingw_printf
+    #else
+        // Use home-baked function that treats "%a" as "%f"
+    static int vlog_win32(const char *format, ...);
+    #define vlog vlog_win32
+    #define vlog_error vlog_win32
+    #endif
-// Use home-baked function that treats "%a" as "%f"
-static int vlog_win32(const char *format, ...);
-#define vlog vlog_win32
-#define vlog_error vlog_win32
-#define vlog_error printf
-#define vlog printf
+    #define vlog_error printf
+    #define vlog printf
-#define ct_assert(b) ct_assert_i(b, __LINE__)
-#define ct_assert_i(b, line) ct_assert_ii(b, line)
-#define ct_assert_ii(b, line)                                                  \
-    int _compile_time_assertion_on_line_##line[b ? 1 : -1];
+#define ct_assert(b)          ct_assert_i(b, __LINE__)
+#define ct_assert_i(b, line)  ct_assert_ii(b, line)
+#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
-#define test_fail(msg, ...)                                                    \
-    {                                                                          \
-        log_error(msg, ##__VA_ARGS__);                                         \
-        return TEST_FAIL;                                                      \
-    }
-#define test_error(errCode, msg) test_error_ret(errCode, msg, errCode)
-#define test_error_ret(errCode, msg, retValue)                                 \
-    {                                                                          \
-        auto errCodeResult = errCode;                                          \
-        if (errCodeResult != CL_SUCCESS)                                       \
-        {                                                                      \
-            print_error(errCodeResult, msg);                                   \
-            return retValue;                                                   \
-        }                                                                      \
-    }
-#define print_error(errCode, msg)                                              \
-    log_error("ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString(errCode),   \
-              __FILE__, __LINE__);
+#define test_error(errCode,msg)    test_error_ret(errCode,msg,errCode)
+#define test_error_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
+#define print_error(errCode,msg)    log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
-#define test_missing_feature(errCode, msg)                                     \
-    test_missing_feature_ret(errCode, msg, errCode)
-// this macro should always return CL_SUCCESS, but print the missing feature
-// message
-#define test_missing_feature_ret(errCode, msg, retValue)                       \
-    {                                                                          \
-        if (errCode != CL_SUCCESS)                                             \
-        {                                                                      \
-            print_missing_feature(errCode, msg);                               \
-            return CL_SUCCESS;                                                 \
-        }                                                                      \
-    }
-#define print_missing_feature(errCode, msg)                                    \
-    log_missing_feature("ERROR: Subtest %s tests a feature not supported by "  \
-                        "the device version! (from %s:%d)\n",                  \
-                        msg, __FILE__, __LINE__);
+#define test_missing_feature(errCode, msg) test_missing_feature_ret(errCode, msg, errCode)
+// this macro should always return CL_SUCCESS, but print the missing feature message
+#define test_missing_feature_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_missing_feature( errCode, msg ); return CL_SUCCESS ; } }
+#define print_missing_feature(errCode, msg) log_missing_feature("ERROR: Subtest %s tests a feature not supported by the device version! (from %s:%d)\n", msg, __FILE__, __LINE__ );
-#define test_missing_support_offline_cmpiler(errCode, msg)                     \
-    test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
-// this macro should always return CL_SUCCESS, but print the skip message on
-// test not supported with offline compiler
-#define test_missing_support_offline_cmpiler_ret(errCode, msg, retValue)       \
-    {                                                                          \
-        if (errCode != CL_SUCCESS)                                             \
-        {                                                                      \
-            log_info("INFO: Subtest %s tests is not supported in offline "     \
-                     "compiler execution path! (from %s:%d)\n",                \
-                     msg, __FILE__, __LINE__);                                 \
-            return TEST_SKIP;                                                  \
-        }                                                                      \
-    }
+#define test_missing_support_offline_cmpiler(errCode, msg) test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
+// this macro should always return CL_SUCCESS, but print the skip message on test not supported with offline compiler
+#define test_missing_support_offline_cmpiler_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { log_info( "INFO: Subtest %s tests is not supported in offline compiler execution path! (from %s:%d)\n", msg, __FILE__, __LINE__ ); return TEST_SKIP ; } }
 // expected error code vs. what we got
-#define test_failure_error(errCode, expectedErrCode, msg)                      \
-    test_failure_error_ret(errCode, expectedErrCode, msg,                      \
-                           errCode != expectedErrCode)
-#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue)        \
-    {                                                                          \
-        if (errCode != expectedErrCode)                                        \
-        {                                                                      \
-            print_failure_error(errCode, expectedErrCode, msg);                \
-            return retValue;                                                   \
-        }                                                                      \
-    }
-#define print_failure_error(errCode, expectedErrCode, msg)                     \
-    log_error("ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg,            \
-              IGetErrorString(errCode), IGetErrorString(expectedErrCode),      \
-              __FILE__, __LINE__);
-#define test_failure_warning(errCode, expectedErrCode, msg)                    \
-    test_failure_warning_ret(errCode, expectedErrCode, msg,                    \
-                             errCode != expectedErrCode)
-#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue)      \
-    {                                                                          \
-        if (errCode != expectedErrCode)                                        \
-        {                                                                      \
-            print_failure_warning(errCode, expectedErrCode, msg);              \
-            warnings++;                                                        \
-        }                                                                      \
-    }
-#define print_failure_warning(errCode, expectedErrCode, msg)                   \
-    log_error("WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg,          \
-              IGetErrorString(errCode), IGetErrorString(expectedErrCode),      \
-              __FILE__, __LINE__);
+#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
+#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
+#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
+#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
-// generate an error when an assertion is false (not error code related)
-#define test_assert_error(condition, msg)                                      \
-    test_assert_error_ret(condition, msg, TEST_FAIL)
-#define test_assert_error_ret(condition, msg, retValue)                        \
-    {                                                                          \
-        if (!(condition))                                                      \
-        {                                                                      \
-            print_assertion_error(condition, msg);                             \
-            return retValue;                                                   \
-        }                                                                      \
-    }
-#define print_assertion_error(condition, msg)                                  \
-    log_error("ERROR: %s! (!(%s) from %s:%d)\n", msg, #condition, __FILE__,    \
-              __LINE__);
-#define ASSERT_SUCCESS(expr, msg)                                              \
-    do                                                                         \
-    {                                                                          \
-        cl_int _temp_retval = (expr);                                          \
-        if (_temp_retval != CL_SUCCESS)                                        \
-        {                                                                      \
-            std::stringstream ss;                                              \
-            ss << "ERROR: " << msg << "=" << IGetErrorString(_temp_retval)     \
-               << " at " << __FILE__ << ":" << __LINE__ << "\n";               \
-            throw std::runtime_error(ss.str());                                \
-        }                                                                      \
+#define ASSERT_SUCCESS(expr, msg)                                                                  \
+    do                                                                                             \
+    {                                                                                              \
+        cl_int _temp_retval = (expr);                                                              \
+        if (_temp_retval != CL_SUCCESS)                                                            \
+        {                                                                                          \
+            std::stringstream ss;                                                                  \
+            ss << "ERROR: " << msg << "=" << IGetErrorString(_temp_retval)                         \
+               << " at " << __FILE__ << ":" << __LINE__ << "\n";                                   \
+            throw std::runtime_error(ss.str());                                                    \
+        }                                                                                          \
     } while (0)
-extern const char *IGetErrorString(int clErrorCode);
+extern const char    *IGetErrorString( int clErrorCode );
-extern float Ulp_Error_Half(cl_half test, float reference);
-extern float Ulp_Error(float test, double reference);
-extern float Ulp_Error_Double(double test, long double reference);
+extern float Ulp_Error_Half( cl_ushort test, float reference );
+extern float Ulp_Error( float test, double reference );
+extern float Ulp_Error_Double( double test, long double reference );
-extern const char *GetChannelTypeName(cl_channel_type type);
-extern int IsChannelTypeSupported(cl_channel_type type);
-extern const char *GetChannelOrderName(cl_channel_order order);
-extern int IsChannelOrderSupported(cl_channel_order order);
-extern const char *GetAddressModeName(cl_addressing_mode mode);
-extern const char *GetQueuePropertyName(cl_command_queue_properties properties);
+extern const char *GetChannelTypeName( cl_channel_type type );
+extern int IsChannelTypeSupported( cl_channel_type type );
+extern const char *GetChannelOrderName( cl_channel_order order );
+extern int IsChannelOrderSupported( cl_channel_order order );
+extern const char *GetAddressModeName( cl_addressing_mode mode );
-extern const char *GetDeviceTypeName(cl_device_type type);
-int check_functions_for_offline_compiler(const char *subtestname,
-                                         cl_device_id device);
-cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
-                       cl_device_id *device_list);
+extern const char *GetDeviceTypeName( cl_device_type type );
+int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device);
-// storage, but it's not reentrant then!)
-extern const char *GetDataVectorString(void *dataBuffer, size_t typeSize,
-                                       size_t vecSize, char *buffer);
+// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
+extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
-#if defined(_WIN32) && !defined(__MINGW32__)
+#if defined (_WIN32) && !defined(__MINGW32__)
 #include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
@@ -204,21 +116,17 @@
     const char *new_format = format;
-    if (strstr(format, "%a"))
-    {
+    if (strstr(format, "%a")) {
         char *temp;
-        if ((temp = strdup(format)) == NULL)
-        {
+        if ((temp = strdup(format)) == NULL) {
             printf("vlog_win32: Failed to allocate memory for strdup\n");
             return -1;
         new_format = temp;
-        while (*temp)
-        {
+        while (*temp) {
             // replace %a with %f
-            if ((*temp == '%') && (*(temp + 1) == 'a'))
-            {
-                *(temp + 1) = 'f';
+            if ((*temp == '%') && (*(temp+1) == 'a')) {
+                *(temp+1) = 'f';
@@ -229,9 +137,8 @@
     vprintf(new_format, args);
-    if (new_format != format)
-    {
-        free((void *)new_format);
+    if (new_format != format) {
+        free((void*)new_format);
     return 0;
@@ -240,3 +147,5 @@
 #endif // _errorHelpers_h
diff --git a/test_common/harness/featureHelpers.cpp b/test_common/harness/featureHelpers.cpp
deleted file mode 100644
index 07435c3..0000000
--- a/test_common/harness/featureHelpers.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "featureHelpers.h"
-#include "errorHelpers.h"
-#include <assert.h>
-#include <string.h>
-#include <vector>
-int get_device_cl_c_features(cl_device_id device, OpenCLCFeatures& features)
-    // Initially, all features are unsupported.
-    features = { 0 };
-    // The CL_DEVICE_OPENCL_C_FEATURES query does not exist pre-3.0.
-    const Version version = get_device_cl_version(device);
-    if (version < Version(3, 0))
-    {
-        return TEST_PASS;
-    }
-    cl_int error = CL_SUCCESS;
-    size_t sz = 0;
-    error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_FEATURES, 0, NULL, &sz);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_FEATURES size");
-    std::vector<cl_name_version> clc_features(sz / sizeof(cl_name_version));
-    error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_FEATURES, sz,
-                  , NULL);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_FEATURES");
-#define CHECK_OPENCL_C_FEATURE(_feature)                                       \
-    if (strcmp(, #_feature) == 0)                              \
-    {                                                                          \
-        features.supports##_feature = true;                                    \
-    }
-    for (const auto& clc_feature : clc_features)
-    {
-        CHECK_OPENCL_C_FEATURE(__opencl_c_3d_image_writes);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_atomic_order_acq_rel);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_atomic_order_seq_cst);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_atomic_scope_device);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_atomic_scope_all_devices);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_device_enqueue);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_generic_address_space);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_fp64);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_images);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_int64);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_pipes);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_program_scope_global_variables);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_read_write_images);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_subgroups);
-        CHECK_OPENCL_C_FEATURE(__opencl_c_work_group_collective_functions);
-    }
-    return TEST_PASS;
diff --git a/test_common/harness/featureHelpers.h b/test_common/harness/featureHelpers.h
deleted file mode 100644
index 3f77b76..0000000
--- a/test_common/harness/featureHelpers.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef _featureHelpers_h
-#define _featureHelpers_h
-#include "compat.h"
-#include "testHarness.h"
-struct OpenCLCFeatures
-    bool supports__opencl_c_3d_image_writes;
-    bool supports__opencl_c_atomic_order_acq_rel;
-    bool supports__opencl_c_atomic_order_seq_cst;
-    bool supports__opencl_c_atomic_scope_device;
-    bool supports__opencl_c_atomic_scope_all_devices;
-    bool supports__opencl_c_device_enqueue;
-    bool supports__opencl_c_generic_address_space;
-    bool supports__opencl_c_fp64;
-    bool supports__opencl_c_images;
-    bool supports__opencl_c_int64;
-    bool supports__opencl_c_pipes;
-    bool supports__opencl_c_program_scope_global_variables;
-    bool supports__opencl_c_read_write_images;
-    bool supports__opencl_c_subgroups;
-    bool supports__opencl_c_work_group_collective_functions;
-int get_device_cl_c_features(cl_device_id device, OpenCLCFeatures& features);
-#endif // _featureHelpers_h
diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h
index 40826c5..4835db4 100644
--- a/test_common/harness/fpcontrol.h
+++ b/test_common/harness/fpcontrol.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,99 +16,89 @@
 #ifndef _fpcontrol_h
 #define _fpcontrol_h
-// In order to get tests for correctly rounded operations (e.g. multiply) to
-// work properly we need to be able to set the reference hardware to FTZ mode if
-// the device hardware is running in that mode.  We have explored all other
-// options short of writing correctly rounded operations in integer code, and
-// have found this is the only way to correctly verify operation.
+// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
+// to FTZ mode if the device hardware is running in that mode.  We have explored all other options short of writing correctly rounded operations
+// in integer code, and have found this is the only way to correctly verify operation.
-// Non-Apple implementations will need to provide their own implentation for
-// these features.  If the reference hardware and device are both running in the
-// same state (either FTZ or IEEE compliant modes) then these functions may be
-// empty.  If the device is running in non-default rounding mode (e.g. round
-// toward zero), then these functions should also set the reference device into
-// that rounding mode.
-#if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__)              \
-    || defined(__MINGW32__)
-typedef int FPU_mode_type;
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
-    || defined(__MINGW32__)
-#include <xmmintrin.h>
-#elif defined(__PPC__)
-#include <fpu_control.h>
-extern __thread fpu_control_t fpu_control;
+// Non-Apple implementations will need to provide their own implentation for these features.  If the reference hardware and device are both
+// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty.  If the device is running in non-default
+// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
+#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
+    typedef int     FPU_mode_type;
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
+    extern __thread fpu_control_t fpu_control;
-// Set the reference hardware floating point unit to FTZ mode
-static inline void ForceFTZ(FPU_mode_type *mode)
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
-    || defined(__MINGW32__)
-    *mode = _mm_getcsr();
-    _mm_setcsr(*mode | 0x8040);
-#elif defined(__PPC__)
-    *mode = fpu_control;
-    fpu_control |= _FPU_MASK_NI;
-#elif defined(__arm__)
-    unsigned fpscr;
-    __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
-    *mode = fpscr;
-    __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
-    // Add 64 bit support
-#elif defined(__aarch64__)
-    unsigned fpscr;
-    __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
-    *mode = fpscr;
-    __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
+    // Set the reference hardware floating point unit to FTZ mode
+    static inline void ForceFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode | 0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control |= _FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
-#error ForceFTZ needs an implentation
+        #error ForceFTZ needs an implentation
+    }
-// Disable the denorm flush to zero
-static inline void DisableFTZ(FPU_mode_type *mode)
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
-    || defined(__MINGW32__)
-    *mode = _mm_getcsr();
-    _mm_setcsr(*mode & ~0x8040);
-#elif defined(__PPC__)
-    *mode = fpu_control;
-    fpu_control &= ~_FPU_MASK_NI;
-#elif defined(__arm__)
-    unsigned fpscr;
-    __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
-    *mode = fpscr;
-    __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
-    // Add 64 bit support
-#elif defined(__aarch64__)
-    unsigned fpscr;
-    __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
-    *mode = fpscr;
-    __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
+    // Disable the denorm flush to zero
+    static inline void DisableFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode & ~0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control &= ~_FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
-#error DisableFTZ needs an implentation
+    #error DisableFTZ needs an implentation
+    }
-// Restore the reference hardware to floating point state indicated by *mode
-static inline void RestoreFPState(FPU_mode_type *mode)
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
-    || defined(__MINGW32__)
-    _mm_setcsr(*mode);
-#elif defined(__PPC__)
-    fpu_control = *mode;
-#elif defined(__arm__)
-    __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
-    // Add 64 bit support
-#elif defined(__aarch64__)
-    __asm__ volatile("msr fpcr, %0" ::"r"(*mode));
+    // Restore the reference hardware to floating point state indicated by *mode
+    static inline void RestoreFPState( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        _mm_setcsr( *mode );
+#elif defined( __PPC__)
+        fpu_control = *mode;
+#elif defined (__arm__)
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        __asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
-#error RestoreFPState needs an implementation
+        #error RestoreFPState needs an implementation
+    }
-#error ForceFTZ and RestoreFPState need implentations
+        #error ForceFTZ and RestoreFPState need implentations
diff --git a/test_common/harness/genericThread.cpp b/test_common/harness/genericThread.cpp
index f50ee6e..2b742fa 100644
--- a/test_common/harness/genericThread.cpp
+++ b/test_common/harness/genericThread.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -21,34 +21,33 @@
 #include <pthread.h>
-void *genericThread::IStaticReflector(void *data)
+void * genericThread::IStaticReflector( void * data )
     genericThread *t = (genericThread *)data;
     return t->IRun();
-bool genericThread::Start(void)
+bool genericThread::Start( void )
 #if defined(_WIN32)
-    mHandle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)IStaticReflector,
-                           this, 0, NULL);
-    return (mHandle != NULL);
+    mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
+    return ( mHandle != NULL );
 #else // !_WIN32
-    int error = pthread_create((pthread_t *)&mHandle, NULL, IStaticReflector,
-                               (void *)this);
-    return (error == 0);
+    int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
+    return ( error == 0 );
 #endif // !_WIN32
-void *genericThread::Join(void)
+void * genericThread::Join( void )
 #if defined(_WIN32)
-    WaitForSingleObject((HANDLE)mHandle, INFINITE);
+    WaitForSingleObject( (HANDLE)mHandle, INFINITE );
     return NULL;
 #else // !_WIN32
-    void *retVal;
-    int error = pthread_join((pthread_t)mHandle, &retVal);
-    if (error != 0) retVal = NULL;
+    void * retVal;
+    int error = pthread_join( (pthread_t)mHandle, &retVal );
+    if( error != 0 )
+        retVal = NULL;
     return retVal;
 #endif // !_WIN32
diff --git a/test_common/harness/genericThread.h b/test_common/harness/genericThread.h
index cc7c010..168b740 100644
--- a/test_common/harness/genericThread.h
+++ b/test_common/harness/genericThread.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,20 +18,25 @@
 #include <stdio.h>
-class genericThread {
-    virtual ~genericThread() {}
+class genericThread
+    public:
-    bool Start(void);
-    void* Join(void);
+        virtual ~genericThread() {}
-    virtual void* IRun(void) = 0;
+        bool    Start( void );
+        void *    Join( void );
-    void* mHandle;
+    protected:
-    static void* IStaticReflector(void* data);
+        virtual void *    IRun( void ) = 0;
+    private:
+        void* mHandle;
+        static void * IStaticReflector( void * data );
 #endif // _genericThread_h
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 72a2f0c..f36c153 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -1,6 +1,6 @@
-// Copyright (c) 2017,2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,30 +16,36 @@
 #include "imageHelpers.h"
 #include <limits.h>
 #include <assert.h>
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
 #include <sys/mman.h>
-#if !defined(_WIN32) && !defined(__APPLE__)
+#if !defined (_WIN32) && !defined(__APPLE__)
 #include <malloc.h>
 #include <algorithm>
 #include <iterator>
-#if !defined(_WIN32)
+#if !defined (_WIN32)
 #include <cmath>
 RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
+static cl_ushort float2half_rte( float f );
+static cl_ushort float2half_rtz( float f );
 cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
 bool gTestRounding = false;
-double sRGBmap(float fc)
+sRGBmap(float fc)
     double c = (double)fc;
-#if !defined(_WIN32)
-    if (std::isnan(c)) c = 0.0;
+#if !defined (_WIN32)
+    if (std::isnan(c))
+        c = 0.0;
-    if (_isnan(c)) c = 0.0;
+    if (_isnan(c))
+        c = 0.0;
     if (c > 1.0)
@@ -49,12 +55,13 @@
     else if (c < 0.0031308)
         c = 12.92 * c;
-        c = (1055.0 / 1000.0) * pow(c, 5.0 / 12.0) - (55.0 / 1000.0);
+        c = (1055.0/1000.0) * pow(c, 5.0/12.0) - (55.0/1000.0);
     return c * 255.0;
-double sRGBunmap(float fc)
+sRGBunmap(float fc)
     double c = (double)fc;
     double result;
@@ -68,19 +75,20 @@
-uint32_t get_format_type_size(const cl_image_format *format)
+size_t get_format_type_size( const cl_image_format *format )
-    return get_channel_data_type_size(format->image_channel_data_type);
+    return get_channel_data_type_size( format->image_channel_data_type );
-uint32_t get_channel_data_type_size(cl_channel_type channelType)
+size_t get_channel_data_type_size( cl_channel_type channelType )
-    switch (channelType)
+    switch( channelType )
         case CL_SNORM_INT8:
         case CL_UNORM_INT8:
         case CL_SIGNED_INT8:
-        case CL_UNSIGNED_INT8: return 1;
+        case CL_UNSIGNED_INT8:
+            return 1;
         case CL_SNORM_INT16:
         case CL_UNORM_INT16:
@@ -90,10 +98,11 @@
         case CL_SFIXED14_APPLE:
-            return sizeof(cl_short);
+            return sizeof( cl_short );
         case CL_SIGNED_INT32:
-        case CL_UNSIGNED_INT32: return sizeof(cl_int);
+        case CL_UNSIGNED_INT32:
+            return sizeof( cl_int );
         case CL_UNORM_SHORT_565:
         case CL_UNORM_SHORT_555:
@@ -105,7 +114,8 @@
         case CL_UNORM_INT_8888:
-        case CL_UNORM_INT_8888_REV: return 4;
+        case CL_UNORM_INT_8888_REV:
+            return 4;
         case CL_UNORM_INT_101010:
@@ -114,20 +124,22 @@
             return 4;
-        case CL_FLOAT: return sizeof(cl_float);
+        case CL_FLOAT:
+            return sizeof( cl_float );
-        default: return 0;
+        default:
+            return 0;
-uint32_t get_format_channel_count(const cl_image_format *format)
+size_t get_format_channel_count( const cl_image_format *format )
-    return get_channel_order_channel_count(format->image_channel_order);
+    return get_channel_order_channel_count( format->image_channel_order );
-uint32_t get_channel_order_channel_count(cl_channel_order order)
+size_t get_channel_order_channel_count( cl_channel_order order )
-    switch (order)
+    switch( order )
         case CL_R:
         case CL_A:
@@ -135,16 +147,19 @@
         case CL_INTENSITY:
         case CL_LUMINANCE:
         case CL_DEPTH:
-        case CL_DEPTH_STENCIL: return 1;
+        case CL_DEPTH_STENCIL:
+            return 1;
         case CL_RG:
         case CL_RA:
-        case CL_RGx: return 2;
+        case CL_RGx:
+            return 2;
         case CL_RGB:
         case CL_RGBx:
         case CL_sRGB:
-        case CL_sRGBx: return 3;
+        case CL_sRGBx:
+            return 3;
         case CL_RGBA:
         case CL_ARGB:
@@ -161,56 +176,56 @@
         case CL_ABGR_APPLE:
-            return 4;
+          return 4;
-            log_error("%s does not support 0x%x\n", __FUNCTION__, order);
-            return 0;
+          log_error("%s does not support 0x%x\n",__FUNCTION__,order);
+          return 0;
-cl_channel_type get_channel_type_from_name(const char *name)
+cl_channel_type  get_channel_type_from_name( const char *name )
-    struct
-    {
+    struct {
         cl_channel_type type;
         const char *name;
-    } typeNames[] = { { CL_SNORM_INT8, "CL_SNORM_INT8" },
-                      { CL_SNORM_INT16, "CL_SNORM_INT16" },
-                      { CL_UNORM_INT8, "CL_UNORM_INT8" },
-                      { CL_UNORM_INT16, "CL_UNORM_INT16" },
-                      { CL_UNORM_INT24, "CL_UNORM_INT24" },
-                      { CL_UNORM_SHORT_565, "CL_UNORM_SHORT_565" },
-                      { CL_UNORM_SHORT_555, "CL_UNORM_SHORT_555" },
-                      { CL_UNORM_INT_101010, "CL_UNORM_INT_101010" },
-                      { CL_SIGNED_INT8, "CL_SIGNED_INT8" },
-                      { CL_SIGNED_INT16, "CL_SIGNED_INT16" },
-                      { CL_SIGNED_INT32, "CL_SIGNED_INT32" },
-                      { CL_UNSIGNED_INT8, "CL_UNSIGNED_INT8" },
-                      { CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" },
-                      { CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" },
-                      { CL_HALF_FLOAT, "CL_HALF_FLOAT" },
-                      { CL_FLOAT, "CL_FLOAT" },
+    } typeNames[] = {
+        { CL_SNORM_INT8, "CL_SNORM_INT8" },
+        { CL_SNORM_INT16, "CL_SNORM_INT16" },
+        { CL_UNORM_INT8, "CL_UNORM_INT8" },
+        { CL_UNORM_INT16, "CL_UNORM_INT16" },
+        { CL_UNORM_INT24, "CL_UNORM_INT24" },
+        { CL_UNORM_SHORT_565, "CL_UNORM_SHORT_565" },
+        { CL_UNORM_SHORT_555, "CL_UNORM_SHORT_555" },
+        { CL_UNORM_INT_101010, "CL_UNORM_INT_101010" },
+        { CL_SIGNED_INT8, "CL_SIGNED_INT8" },
+        { CL_SIGNED_INT16, "CL_SIGNED_INT16" },
+        { CL_SIGNED_INT32, "CL_SIGNED_INT32" },
+        { CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" },
+        { CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" },
+        { CL_HALF_FLOAT, "CL_HALF_FLOAT" },
+        { CL_FLOAT, "CL_FLOAT" },
-                      { CL_SFIXED14_APPLE, "CL_SFIXED14_APPLE" }
-    for (size_t i = 0; i < sizeof(typeNames) / sizeof(typeNames[0]); i++)
+    for( size_t i = 0; i < sizeof( typeNames ) / sizeof( typeNames[ 0 ] ); i++ )
-        if (strcmp(typeNames[i].name, name) == 0
-            || strcmp(typeNames[i].name + 3, name) == 0)
-            return typeNames[i].type;
+        if( strcmp( typeNames[ i ].name, name ) == 0 || strcmp( typeNames[ i ].name + 3, name ) == 0 )
+            return typeNames[ i ].type;
     return (cl_channel_type)-1;
-cl_channel_order get_channel_order_from_name(const char *name)
+cl_channel_order  get_channel_order_from_name( const char *name )
     const struct
-        cl_channel_order order;
-        const char *name;
-    } orderNames[] = {
+        cl_channel_order    order;
+        const char          *name;
+    }orderNames[] =
+    {
         { CL_R, "CL_R" },
         { CL_A, "CL_A" },
         { CL_Rx, "CL_Rx" },
@@ -222,8 +237,8 @@
         { CL_RGBA, "CL_RGBA" },
         { CL_BGRA, "CL_BGRA" },
         { CL_ARGB, "CL_ARGB" },
         { CL_DEPTH, "CL_DEPTH" },
         { CL_sRGB, "CL_sRGB" },
@@ -239,19 +254,18 @@
-    for (size_t i = 0; i < sizeof(orderNames) / sizeof(orderNames[0]); i++)
+    for( size_t i = 0; i < sizeof( orderNames ) / sizeof( orderNames[ 0 ] ); i++ )
-        if (strcmp(orderNames[i].name, name) == 0
-            || strcmp(orderNames[i].name + 3, name) == 0)
-            return orderNames[i].order;
+        if( strcmp( orderNames[ i ].name, name ) == 0 || strcmp( orderNames[ i ].name + 3, name ) == 0 )
+            return orderNames[ i ].order;
     return (cl_channel_order)-1;
-int is_format_signed(const cl_image_format *format)
+int is_format_signed( const cl_image_format *format )
-    switch (format->image_channel_data_type)
+    switch( format->image_channel_data_type )
         case CL_SNORM_INT8:
         case CL_SIGNED_INT8:
@@ -265,269 +279,163 @@
             return 1;
-        default: return 0;
+        default:
+            return 0;
-uint32_t get_pixel_size(const cl_image_format *format)
+size_t get_pixel_size( cl_image_format *format )
-    switch (format->image_channel_data_type)
-    {
-        case CL_SNORM_INT8:
-        case CL_UNORM_INT8:
-        case CL_SIGNED_INT8:
-        case CL_UNSIGNED_INT8: return get_format_channel_count(format);
+  switch( format->image_channel_data_type )
+  {
+    case CL_SNORM_INT8:
+    case CL_UNORM_INT8:
+    case CL_SIGNED_INT8:
+    case CL_UNSIGNED_INT8:
+      return get_format_channel_count( format );
-        case CL_SNORM_INT16:
-        case CL_UNORM_INT16:
-        case CL_SIGNED_INT16:
-        case CL_UNSIGNED_INT16:
-        case CL_HALF_FLOAT:
+    case CL_SNORM_INT16:
+    case CL_UNORM_INT16:
+    case CL_SIGNED_INT16:
+    case CL_UNSIGNED_INT16:
+    case CL_HALF_FLOAT:
+#ifdef  CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
-            return get_format_channel_count(format) * sizeof(cl_ushort);
+      return get_format_channel_count( format ) * sizeof( cl_ushort );
-        case CL_SIGNED_INT32:
-        case CL_UNSIGNED_INT32:
-            return get_format_channel_count(format) * sizeof(cl_int);
+    case CL_SIGNED_INT32:
+    case CL_UNSIGNED_INT32:
+      return get_format_channel_count( format ) * sizeof( cl_int );
-        case CL_UNORM_SHORT_565:
-        case CL_UNORM_SHORT_555:
+    case CL_UNORM_SHORT_565:
+    case CL_UNORM_SHORT_555:
-        case CL_UNORM_SHORT_565_REV:
-        case CL_UNORM_SHORT_555_REV:
+    case CL_UNORM_SHORT_565_REV:
+    case CL_UNORM_SHORT_555_REV:
-            return 2;
+      return 2;
-        case CL_UNORM_INT_8888:
-        case CL_UNORM_INT_8888_REV: return 4;
+    case CL_UNORM_INT_8888:
+    case CL_UNORM_INT_8888_REV:
+      return 4;
-        case CL_UNORM_INT_101010:
+    case CL_UNORM_INT_101010:
-        case CL_UNORM_INT_101010_REV:
+    case CL_UNORM_INT_101010_REV:
-            return 4;
+      return 4;
-        case CL_FLOAT:
-            return get_format_channel_count(format) * sizeof(cl_float);
+    case CL_FLOAT:
+      return get_format_channel_count( format ) * sizeof( cl_float );
-        default: return 0;
-    }
+    default:
+      return 0;
+  }
-uint32_t next_power_of_two(uint32_t v)
+int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
-    v--;
-    v |= v >> 1;
-    v |= v >> 2;
-    v |= v >> 4;
-    v |= v >> 8;
-    v |= v >> 16;
-    v++;
-    return v;
-uint32_t get_pixel_alignment(const cl_image_format *format)
-    return next_power_of_two(get_pixel_size(format));
-int get_8_bit_image_format(cl_context context, cl_mem_object_type objType,
-                           cl_mem_flags flags, size_t channelCount,
-                           cl_image_format *outFormat)
-    cl_image_format formatList[128];
+    cl_image_format formatList[ 128 ];
     unsigned int outFormatCount, i;
     int error;
     /* Make sure each image format is supported */
-    if ((error = clGetSupportedImageFormats(context, flags, objType, 128,
-                                            formatList, &outFormatCount)))
-        return error;
+    if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
+    return error;
     /* Look for one that is an 8-bit format */
-    for (i = 0; i < outFormatCount; i++)
+    for( i = 0; i < outFormatCount; i++ )
-        if (formatList[i].image_channel_data_type == CL_SNORM_INT8
-            || formatList[i].image_channel_data_type == CL_UNORM_INT8
-            || formatList[i].image_channel_data_type == CL_SIGNED_INT8
-            || formatList[i].image_channel_data_type == CL_UNSIGNED_INT8)
+        if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
+       formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
+           formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
+           formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
-            if (!channelCount
-                || (channelCount
-                    && (get_format_channel_count(&formatList[i])
-                        == channelCount)))
-            {
-                *outFormat = formatList[i];
-                return 0;
-            }
+      if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
+      {
+        *outFormat = formatList[ i ];
+        return 0;
+      }
     return -1;
-int get_32_bit_image_format(cl_context context, cl_mem_object_type objType,
-                            cl_mem_flags flags, size_t channelCount,
-                            cl_image_format *outFormat)
+int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
-    cl_image_format formatList[128];
+    cl_image_format formatList[ 128 ];
     unsigned int outFormatCount, i;
     int error;
-    /* Make sure each image format is supported */
-    if ((error = clGetSupportedImageFormats(context, flags, objType, 128,
-                                            formatList, &outFormatCount)))
-        return error;
+  /* Make sure each image format is supported */
+  if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
+    return error;
-    /* Look for one that is an 8-bit format */
-    for (i = 0; i < outFormatCount; i++)
+  /* Look for one that is an 8-bit format */
+  for( i = 0; i < outFormatCount; i++ )
+  {
+        if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
+            formatList[ i ].image_channel_data_type == CL_FLOAT ||
+            formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
+            formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
-        if (formatList[i].image_channel_data_type == CL_UNORM_INT_101010
-            || formatList[i].image_channel_data_type == CL_FLOAT
-            || formatList[i].image_channel_data_type == CL_SIGNED_INT32
-            || formatList[i].image_channel_data_type == CL_UNSIGNED_INT32)
-        {
-            if (!channelCount
-                || (channelCount
-                    && (get_format_channel_count(&formatList[i])
-                        == channelCount)))
-            {
-                *outFormat = formatList[i];
-                return 0;
-            }
-        }
+      if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
+      {
+        *outFormat = formatList[ i ];
+        return 0;
+      }
+    }
     return -1;
-void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
-                                        const char *destPixel,
-                                        image_descriptor *imageInfo, size_t y,
-                                        size_t thirdDim)
+int random_log_in_range( int minV, int maxV, MTdata d  )
-    size_t pixel_size = get_pixel_size(imageInfo->format);
-    log_error("ERROR: Scanline %d did not verify for image size %d,%d,%d "
-              "pitch %d (extra %d bytes)\n",
-              (int)y, (int)imageInfo->width, (int)imageInfo->height,
-              (int)thirdDim, (int)imageInfo->rowPitch,
-              (int)imageInfo->rowPitch
-                  - (int)imageInfo->width * (int)pixel_size);
-    log_error("Failed at column: %ld   ", where);
-    switch (pixel_size)
-    {
-        case 1:
-            log_error("*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar *)sourcePixel)[0],
-                      ((cl_uchar *)destPixel)[0]);
-            break;
-        case 2:
-            log_error("*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort *)sourcePixel)[0],
-                      ((cl_ushort *)destPixel)[0]);
-            break;
-        case 3:
-            log_error("*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. "
-                      "{0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
-                      ((cl_uchar *)sourcePixel)[0],
-                      ((cl_uchar *)sourcePixel)[1],
-                      ((cl_uchar *)sourcePixel)[2], ((cl_uchar *)destPixel)[0],
-                      ((cl_uchar *)destPixel)[1], ((cl_uchar *)destPixel)[2]);
-            break;
-        case 4:
-            log_error("*0x%8.8x vs. 0x%8.8x\n", ((cl_uint *)sourcePixel)[0],
-                      ((cl_uint *)destPixel)[0]);
-            break;
-        case 6:
-            log_error(
-                "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. "
-                "{0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
-                ((cl_ushort *)sourcePixel)[0], ((cl_ushort *)sourcePixel)[1],
-                ((cl_ushort *)sourcePixel)[2], ((cl_ushort *)destPixel)[0],
-                ((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]);
-            break;
-        case 8:
-            log_error("*0x%16.16llx vs. 0x%16.16llx\n",
-                      ((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]);
-            break;
-        case 12:
-            log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. "
-                      "{0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
-                      ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1],
-                      ((cl_uint *)sourcePixel)[2], ((cl_uint *)destPixel)[0],
-                      ((cl_uint *)destPixel)[1], ((cl_uint *)destPixel)[2]);
-            break;
-        case 16:
-            log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. "
-                      "{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
-                      ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1],
-                      ((cl_uint *)sourcePixel)[2], ((cl_uint *)sourcePixel)[3],
-                      ((cl_uint *)destPixel)[0], ((cl_uint *)destPixel)[1],
-                      ((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]);
-            break;
-        default:
-            log_error("Don't know how to print pixel size of %ld\n",
-                      pixel_size);
-            break;
-    }
-int random_log_in_range(int minV, int maxV, MTdata d)
-    double v = log2(((double)genrand_int32(d) / (double)0xffffffff) + 1);
-    int iv = (int)((float)(maxV - minV) * v);
+    double v = log2( ( (double)genrand_int32(d) / (double)0xffffffff ) + 1 );
+    int iv = (int)( (float)( maxV - minV ) * v );
     return iv + minV;
 // Define the addressing functions
-typedef int (*AddressFn)(int value, size_t maxValue);
+typedef int (*AddressFn)( int value, size_t maxValue );
-int NoAddressFn(int value, size_t maxValue) { return value; }
-int RepeatAddressFn(int value, size_t maxValue)
+int         NoAddressFn( int value, size_t maxValue )               { return value; }
+int         RepeatAddressFn( int value, size_t maxValue )
-    if (value < 0)
+    if( value < 0 )
         value += (int)maxValue;
-    else if (value >= (int)maxValue)
+    else if( value >= (int)maxValue )
         value -= (int)maxValue;
     return value;
-int MirroredRepeatAddressFn(int value, size_t maxValue)
+int         MirroredRepeatAddressFn( int value, size_t maxValue )
-    if (value < 0)
-        value = 0;
-    else if ((size_t)value >= maxValue)
-        value = (int)(maxValue - 1);
+    if( value < 0 )
+        value  = 0;
+    else if( (size_t) value >= maxValue )
+        value = (int) (maxValue - 1);
     return value;
-int ClampAddressFn(int value, size_t maxValue)
-    return (value < -1) ? -1
-                        : ((value > (cl_long)maxValue) ? (int)maxValue : value);
-int ClampToEdgeNearestFn(int value, size_t maxValue)
-    return (value < 0)
-        ? 0
-        : (((size_t)value > maxValue - 1) ? (int)maxValue - 1 : value);
-AddressFn ClampToEdgeLinearFn = ClampToEdgeNearestFn;
+int         ClampAddressFn( int value, size_t maxValue )            { return ( value < -1 ) ? -1 : ( ( value > (cl_long) maxValue ) ? (int)maxValue : value ); }
+int         ClampToEdgeNearestFn( int value, size_t maxValue )  { return ( value < 0 ) ? 0 : ( ( (size_t)value > maxValue - 1 ) ? (int)maxValue - 1 : value ); }
+AddressFn   ClampToEdgeLinearFn                                                 = ClampToEdgeNearestFn;
-// Note: normalized coords get repeated in normalized space, not unnormalized
-// space! hence the special case here
+// Note: normalized coords get repeated in normalized space, not unnormalized space! hence the special case here
 volatile float gFloatHome;
-float RepeatNormalizedAddressFn(float fValue, size_t maxValue)
+float           RepeatNormalizedAddressFn( float fValue, size_t maxValue )
 #ifndef _MSC_VER // Use original if not the VS compiler.
     // General computation for repeat
-    return (fValue - floorf(fValue)) * (float)maxValue; // Reduce to [0, 1.f]
+    return (fValue - floorf( fValue )) * (float) maxValue; // Reduce to [0, 1.f]
 #else // Otherwise, use this instead:
     // Home the subtraction to a float to break up the sequence of x87
     // instructions emitted by the VS compiler.
@@ -536,94 +444,91 @@
-float MirroredRepeatNormalizedAddressFn(float fValue, size_t maxValue)
+float           MirroredRepeatNormalizedAddressFn( float fValue, size_t maxValue )
-    // Round to nearest multiple of two.
-    // Note halfway values flip flop here due to rte, but they both end up
-    // pointing the same place at the end of the day.
-    float s_prime = 2.0f * rintf(fValue * 0.5f);
+    // Round to nearest multiple of two
+    float s_prime = 2.0f * rintf( fValue * 0.5f );        // Note halfway values flip flop here due to rte, but they both end up pointing the same place at the end of the day
     // Reduce to [-1, 1], Apply mirroring -> [0, 1]
-    s_prime = fabsf(fValue - s_prime);
+    s_prime = fabsf( fValue - s_prime );
     // un-normalize
-    return s_prime * (float)maxValue;
+    return s_prime * (float) maxValue;
 struct AddressingTable
-        ct_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2);
+        ct_assert( ( CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6 ) );
+        ct_assert( CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2 );
-              [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = NoAddressFn;
-              [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = NoAddressFn;
-              [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = RepeatAddressFn;
-              [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = RepeatAddressFn;
-              [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = ClampToEdgeNearestFn;
-              [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = ClampToEdgeLinearFn;
-              [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = ClampAddressFn;
-              [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = ClampAddressFn;
-              [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = MirroredRepeatAddressFn;
-              [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = MirroredRepeatAddressFn;
+        mTable[ CL_ADDRESS_NONE - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]             = NoAddressFn;
+        mTable[ CL_ADDRESS_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]           = RepeatAddressFn;
+        mTable[ CL_ADDRESS_CLAMP - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]           = ClampAddressFn;
+        mTable[ CL_ADDRESS_CLAMP - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]            = ClampAddressFn;
-    AddressFn operator[](image_sampler_data *sampler)
+    AddressFn operator[]( image_sampler_data *sampler )
-        return mTable[(int)sampler->addressing_mode - CL_ADDRESS_NONE]
-                     [(int)sampler->filter_mode - CL_FILTER_NEAREST];
+        return mTable[ (int)sampler->addressing_mode - CL_ADDRESS_NONE ][ (int)sampler->filter_mode - CL_FILTER_NEAREST ];
-    AddressFn mTable[6][2];
+    AddressFn mTable[ 6 ][ 2 ];
-static AddressingTable sAddressingTable;
+static AddressingTable  sAddressingTable;
-bool is_sRGBA_order(cl_channel_order image_channel_order)
-    switch (image_channel_order)
-    {
+bool is_sRGBA_order(cl_channel_order image_channel_order){
+    switch (image_channel_order) {
         case CL_sRGB:
         case CL_sRGBx:
         case CL_sRGBA:
-        case CL_sBGRA: return true;
-        default: return false;
+        case CL_sBGRA:
+            return true;
+        default:
+            return false;
 // Format helpers
-int has_alpha(const cl_image_format *format)
-    switch (format->image_channel_order)
-    {
-        case CL_R: return 0;
-        case CL_A: return 1;
-        case CL_Rx: return 0;
-        case CL_RG: return 0;
-        case CL_RA: return 1;
-        case CL_RGx: return 0;
+int has_alpha(cl_image_format *format) {
+    switch (format->image_channel_order) {
+        case CL_R:
+            return 0;
+        case CL_A:
+            return 1;
+        case CL_Rx:
+            return 0;
+        case CL_RG:
+            return 0;
+        case CL_RA:
+            return 1;
+        case CL_RGx:
+            return 0;
         case CL_RGB:
-        case CL_sRGB: return 0;
+        case CL_sRGB:
+            return 0;
         case CL_RGBx:
-        case CL_sRGBx: return 0;
-        case CL_RGBA: return 1;
-        case CL_BGRA: return 1;
-        case CL_ARGB: return 1;
-        case CL_ABGR: return 1;
-        case CL_INTENSITY: return 1;
-        case CL_LUMINANCE: return 0;
+        case CL_sRGBx:
+            return 0;
+        case CL_RGBA:
+            return 1;
+        case CL_BGRA:
+            return 1;
+        case CL_ARGB:
+            return 1;
+        case CL_INTENSITY:
+            return 1;
+        case CL_LUMINANCE:
+            return 0;
 #ifdef CL_BGR1_APPLE
         case CL_BGR1_APPLE: return 1;
@@ -631,278 +536,235 @@
         case CL_1RGB_APPLE: return 1;
         case CL_sRGBA:
-        case CL_sBGRA: return 1;
-        case CL_DEPTH: return 0;
+        case CL_sBGRA:
+            return 1;
+        case CL_DEPTH:
+            return 0;
-            log_error("Invalid image channel order: %d\n",
-                      format->image_channel_order);
+            log_error("Invalid image channel order: %d\n", format->image_channel_order);
             return 0;
-#define SWAP(_a, _b)                                                           \
-    do                                                                         \
-    {                                                                          \
-        _a ^= _b;                                                              \
-        _b ^= _a;                                                              \
-        _a ^= _b;                                                              \
-    } while (0)
+#define SWAP( _a, _b )      do{ _a ^= _b; _b ^= _a; _a ^= _b; }while(0)
 #ifndef MAX
-#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
+    #define MAX( _a, _b )   ((_a) > (_b) ? (_a) : (_b))
-void get_max_sizes(
-    size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3],
-    size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
-    const cl_ulong maxIndividualAllocSize, // CL_DEVICE_MAX_MEM_ALLOC_SIZE
-    const cl_ulong maxTotalAllocSize, // CL_DEVICE_GLOBAL_MEM_SIZE
-    cl_mem_object_type image_type, const cl_image_format *format,
-    int usingMaxPixelSizeBuffer)
+void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
+                   size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
+                   const cl_ulong maxIndividualAllocSize,       // CL_DEVICE_MAX_MEM_ALLOC_SIZE
+                   const cl_ulong maxTotalAllocSize,            // CL_DEVICE_GLOBAL_MEM_SIZE
+                   cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSizeBuffer) {
     bool is3D = (image_type == CL_MEM_OBJECT_IMAGE3D);
-    bool isArray = (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY
-                    || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
+    bool isArray = (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
     // Validate we have a reasonable max depth for 3D
-    if (is3D && maxDepth < 2)
-    {
-        log_error("ERROR: Requesting max image sizes for 3D images when max "
-                  "depth is < 2.\n");
+    if (is3D && maxDepth < 2) {
+        log_error("ERROR: Requesting max image sizes for 3D images when max depth is < 2.\n");
         *numberOfSizes = 0;
     // Validate we have a reasonable max array size for 1D & 2D image arrays
-    if (isArray && maxArraySize < 2)
-    {
-        log_error("ERROR: Requesting max image sizes for an image array when "
-                  "max array size is < 1.\n");
+    if (isArray && maxArraySize < 2) {
+        log_error("ERROR: Requesting max image sizes for an image array when max array size is < 1.\n");
         *numberOfSizes = 0;
-    // Reduce the maximum because we are trying to test the max image
-    // dimensions, not the memory allocation
+    // Reduce the maximum because we are trying to test the max image dimensions, not the memory allocation
     cl_ulong adjustedMaxTotalAllocSize = maxTotalAllocSize / 4;
     cl_ulong adjustedMaxIndividualAllocSize = maxIndividualAllocSize / 4;
-    log_info("Note: max individual allocation adjusted down from %gMB to %gMB "
-             "and max total allocation adjusted down from %gMB to %gMB.\n",
-             maxIndividualAllocSize / (1024.0 * 1024.0),
-             adjustedMaxIndividualAllocSize / (1024.0 * 1024.0),
-             maxTotalAllocSize / (1024.0 * 1024.0),
-             adjustedMaxTotalAllocSize / (1024.0 * 1024.0));
+    log_info("Note: max individual allocation adjusted down from %gMB to %gMB and max total allocation adjusted down from %gMB to %gMB.\n",
+             maxIndividualAllocSize/(1024.0*1024.0), adjustedMaxIndividualAllocSize/(1024.0*1024.0),
+             maxTotalAllocSize/(1024.0*1024.0), adjustedMaxTotalAllocSize/(1024.0*1024.0));
     // Cap our max allocation to 1.0GB.
-    // FIXME -- why?  In the interest of not taking a long time?  We should
-    // still test this stuff...
-    if (adjustedMaxTotalAllocSize > (cl_ulong)1024 * 1024 * 1024)
-    {
-        adjustedMaxTotalAllocSize = (cl_ulong)1024 * 1024 * 1024;
-        log_info("Limiting max total allocation size to %gMB (down from %gMB) "
-                 "for test.\n",
-                 adjustedMaxTotalAllocSize / (1024.0 * 1024.0),
-                 maxTotalAllocSize / (1024.0 * 1024.0));
+    // FIXME -- why?  In the interest of not taking a long time?  We should still test this stuff...
+    if (adjustedMaxTotalAllocSize > (cl_ulong)1024*1024*1024) {
+      adjustedMaxTotalAllocSize = (cl_ulong)1024*1024*1024;
+      log_info("Limiting max total allocation size to %gMB (down from %gMB) for test.\n",
+        adjustedMaxTotalAllocSize/(1024.0*1024.0), maxTotalAllocSize/(1024.0*1024.0));
     cl_ulong maxAllocSize = adjustedMaxIndividualAllocSize;
-    if (adjustedMaxTotalAllocSize < adjustedMaxIndividualAllocSize * 2)
-        maxAllocSize = adjustedMaxTotalAllocSize / 2;
+    if (adjustedMaxTotalAllocSize < adjustedMaxIndividualAllocSize*2)
+        maxAllocSize = adjustedMaxTotalAllocSize/2;
     size_t raw_pixel_size = get_pixel_size(format);
-    // If the test will be creating input (src) buffer of type int4 or float4,
-    // number of pixels will be governed by sizeof(int4 or float4) and not
-    // sizeof(dest fomat) Also if pixel size is 12 bytes i.e. RGB or RGBx, we
-    // adjust it to 16 bytes as GPUs has no concept of 3 channel images. GPUs
-    // expand these to four channel RGBA.
-    if (usingMaxPixelSizeBuffer || raw_pixel_size == 12) raw_pixel_size = 16;
+    // If the test will be creating input (src) buffer of type int4 or float4, number of pixels will be
+    // governed by sizeof(int4 or float4) and not sizeof(dest fomat)
+    // Also if pixel size is 12 bytes i.e. RGB or RGBx, we adjust it to 16 bytes as GPUs has no concept
+    // of 3 channel images. GPUs expand these to four channel RGBA.
+    if(usingMaxPixelSizeBuffer || raw_pixel_size == 12)
+      raw_pixel_size = 16;
     size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size;
-    log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, "
-             "per-allocation limit %gMB.\n",
-             maxWidth, maxHeight, isArray ? maxArraySize : maxDepth,
-             raw_pixel_size, (maxAllocSize / (1024.0 * 1024.0)));
+    log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, per-allocation limit %gMB.\n",
+             maxWidth, maxHeight, isArray ? maxArraySize : maxDepth, raw_pixel_size, (maxAllocSize/(1024.0*1024.0)));
-    // Keep track of the maximum sizes for each dimension
-    size_t maximum_sizes[] = { maxWidth, maxHeight, maxDepth };
+  // Keep track of the maximum sizes for each dimension
+  size_t maximum_sizes[] = { maxWidth, maxHeight, maxDepth };
-    switch (image_type)
-    {
-            maximum_sizes[1] = maxArraySize;
-            maximum_sizes[2] = 1;
-            break;
-            maximum_sizes[2] = maxArraySize;
-            break;
-    }
+  switch (image_type) {
+      maximum_sizes[1] = maxArraySize;
+      maximum_sizes[2] = 1;
+      break;
+      maximum_sizes[2] = maxArraySize;
+      break;
+  }
-        // Given one fixed sized dimension, this code finds one or two other
-        // dimensions, both with very small size, such that the size does not
-        // exceed the maximum passed to this function
+  // Given one fixed sized dimension, this code finds one or two other dimensions,
+  // both with very small size, such that the size does not exceed the maximum
+  // passed to this function
-#if defined(__x86_64) || defined(__arm64__) || defined(__ppc64__)
-    size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 10, 11, 13, 15 };
+#if defined(__x86_64) || defined (__arm64__) || defined (__ppc64__)
+  size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 10, 11, 13, 15};
-    size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 11, 13 };
+  size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 11, 13};
-    static size_t other_size = 0;
-    enum
-    {
-        num_other_sizes = sizeof(other_sizes) / sizeof(size_t)
-    };
+  static size_t other_size = 0;
+  enum { num_other_sizes = sizeof(other_sizes)/sizeof(size_t) };
-    (*numberOfSizes) = 0;
+  (*numberOfSizes) = 0;
-    if (image_type == CL_MEM_OBJECT_IMAGE1D)
-    {
+  if (image_type == CL_MEM_OBJECT_IMAGE1D) {
-        double M = maximum_sizes[0];
+    double M = maximum_sizes[0];
-        // Store the size
-        sizes[(*numberOfSizes)][0] = (size_t)M;
-        sizes[(*numberOfSizes)][1] = 1;
-        sizes[(*numberOfSizes)][2] = 1;
-        ++(*numberOfSizes);
+    // Store the size
+    sizes[(*numberOfSizes)][0] = (size_t)M;
+    sizes[(*numberOfSizes)][1] = 1;
+    sizes[(*numberOfSizes)][2] = 1;
+    ++(*numberOfSizes);
+  }
+  else if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D) {
+    for (int fixed_dim=0;fixed_dim<2;++fixed_dim) {
+      // Determine the size of the fixed dimension
+      double M = maximum_sizes[fixed_dim];
+      double A = max_pixels;
+      int x0_dim = !fixed_dim;
+      double x0  = fmin(fmin(other_sizes[(other_size++)%num_other_sizes],A/M), maximum_sizes[x0_dim]);
+      // Store the size
+      sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
+      sizes[(*numberOfSizes)][x0_dim]    = (size_t)x0;
+      sizes[(*numberOfSizes)][2]         = 1;
+      ++(*numberOfSizes);
+  }
-    else if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY
-             || image_type == CL_MEM_OBJECT_IMAGE2D)
-    {
+  else if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE3D) {
-        for (int fixed_dim = 0; fixed_dim < 2; ++fixed_dim)
-        {
+    // Iterate over dimensions, finding sizes for the non-fixed dimension
+    for (int fixed_dim=0;fixed_dim<3;++fixed_dim) {
-            // Determine the size of the fixed dimension
-            double M = maximum_sizes[fixed_dim];
-            double A = max_pixels;
+      // Determine the size of the fixed dimension
+      double M = maximum_sizes[fixed_dim];
+      double A = max_pixels;
-            int x0_dim = !fixed_dim;
-            double x0 =
-                fmin(fmin(other_sizes[(other_size++) % num_other_sizes], A / M),
-                     maximum_sizes[x0_dim]);
+      // Find two other dimensions, x0 and x1
+      int x0_dim = (fixed_dim == 0) ? 1 : 0;
+      int x1_dim = (fixed_dim == 2) ? 1 : 2;
-            // Store the size
-            sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
-            sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
-            sizes[(*numberOfSizes)][2] = 1;
-            ++(*numberOfSizes);
-        }
+      // Choose two other sizes for these dimensions
+      double x0 = fmin(fmin(A/M,maximum_sizes[x0_dim]),other_sizes[(other_size++)%num_other_sizes]);
+      // GPUs have certain restrictions on minimum width (row alignment) of images which has given us issues
+      // testing small widths in this test (say we set width to 3 for testing, and compute size based on this width and decide
+      // it fits within vram ... but GPU driver decides that, due to row alignment requirements, it has to use
+      // width of 16 which doesnt fit in vram). For this purpose we are not testing width < 16 for this test.
+      if(x0_dim == 0 && x0 < 16)
+        x0 = 16;
+      double x1 = fmin(fmin(A/M/x0,maximum_sizes[x1_dim]),other_sizes[(other_size++)%num_other_sizes]);
+      // Valid image sizes cannot be below 1. Due to the workaround for the xo_dim where x0 is overidden to 16
+      // there might not be enough space left for x1 dimension. This could be a fractional 0.x size that when cast to
+      // integer would result in a value 0. In these cases we clamp the size to a minimum of 1.
+      if ( x1 < 1 )
+        x1 = 1;
+      // M and x0 cannot be '0' as they derive from clDeviceInfo calls
+      assert(x0 > 0 && M > 0);
+      // Store the size
+      sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
+      sizes[(*numberOfSizes)][x0_dim]    = (size_t)x0;
+      sizes[(*numberOfSizes)][x1_dim]    = (size_t)x1;
+      ++(*numberOfSizes);
+  }
-    else if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY
-             || image_type == CL_MEM_OBJECT_IMAGE3D)
-    {
-        // Iterate over dimensions, finding sizes for the non-fixed dimension
-        for (int fixed_dim = 0; fixed_dim < 3; ++fixed_dim)
-        {
-            // Determine the size of the fixed dimension
-            double M = maximum_sizes[fixed_dim];
-            double A = max_pixels;
-            // Find two other dimensions, x0 and x1
-            int x0_dim = (fixed_dim == 0) ? 1 : 0;
-            int x1_dim = (fixed_dim == 2) ? 1 : 2;
-            // Choose two other sizes for these dimensions
-            double x0 = fmin(fmin(A / M, maximum_sizes[x0_dim]),
-                             other_sizes[(other_size++) % num_other_sizes]);
-            // GPUs have certain restrictions on minimum width (row alignment)
-            // of images which has given us issues testing small widths in this
-            // test (say we set width to 3 for testing, and compute size based
-            // on this width and decide it fits within vram ... but GPU driver
-            // decides that, due to row alignment requirements, it has to use
-            // width of 16 which doesnt fit in vram). For this purpose we are
-            // not testing width < 16 for this test.
-            if (x0_dim == 0 && x0 < 16) x0 = 16;
-            double x1 = fmin(fmin(A / M / x0, maximum_sizes[x1_dim]),
-                             other_sizes[(other_size++) % num_other_sizes]);
-            // Valid image sizes cannot be below 1. Due to the workaround for
-            // the xo_dim where x0 is overidden to 16 there might not be enough
-            // space left for x1 dimension. This could be a fractional 0.x size
-            // that when cast to integer would result in a value 0. In these
-            // cases we clamp the size to a minimum of 1.
-            if (x1 < 1) x1 = 1;
-            // M and x0 cannot be '0' as they derive from clDeviceInfo calls
-            assert(x0 > 0 && M > 0);
-            // Store the size
-            sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
-            sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
-            sizes[(*numberOfSizes)][x1_dim] = (size_t)x1;
-            ++(*numberOfSizes);
-        }
+  // Log the results
+  for (int j=0; j<(int)(*numberOfSizes); j++) {
+    switch (image_type) {
+      case CL_MEM_OBJECT_IMAGE1D:
+        log_info(" size[%d] = [%ld] (%g MB image)\n",
+                 j, sizes[j][0], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
+        break;
+      case CL_MEM_OBJECT_IMAGE2D:
+        log_info(" size[%d] = [%ld %ld] (%g MB image)\n",
+                 j, sizes[j][0], sizes[j][1], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
+        break;
+      case CL_MEM_OBJECT_IMAGE3D:
+        log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n",
+                 j, sizes[j][0], sizes[j][1], sizes[j][2], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
+        break;
+  }
-    // Log the results
-    for (int j = 0; j < (int)(*numberOfSizes); j++)
-    {
-        switch (image_type)
-        {
-            case CL_MEM_OBJECT_IMAGE1D:
-                log_info(" size[%d] = [%ld] (%g MB image)\n", j, sizes[j][0],
-                         raw_pixel_size * sizes[j][0] * sizes[j][1]
-                             * sizes[j][2] / (1024.0 * 1024.0));
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE2D:
-                log_info(" size[%d] = [%ld %ld] (%g MB image)\n", j,
-                         sizes[j][0], sizes[j][1],
-                         raw_pixel_size * sizes[j][0] * sizes[j][1]
-                             * sizes[j][2] / (1024.0 * 1024.0));
-                break;
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE3D:
-                log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n", j,
-                         sizes[j][0], sizes[j][1], sizes[j][2],
-                         raw_pixel_size * sizes[j][0] * sizes[j][1]
-                             * sizes[j][2] / (1024.0 * 1024.0));
-                break;
-        }
+float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) {
+    if (sampler->filter_mode == CL_FILTER_NEAREST)
+        return 0.0f;
+    switch (format->image_channel_data_type) {
+        case CL_SNORM_INT8:
+            return 1.0f/127.0f;
+        case CL_UNORM_INT8:
+            return 1.0f/255.0f;
+        case CL_UNORM_INT16:
+            return 1.0f/65535.0f;
+        case CL_SNORM_INT16:
+            return 1.0f/32767.0f;
+        case CL_FLOAT:
+            return CL_FLT_MIN;
+#ifdef  CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+            return 0x1.0p-14f;
+        default:
+            return 0.0f;
-float get_max_absolute_error(const cl_image_format *format,
-                             image_sampler_data *sampler)
-    if (sampler->filter_mode == CL_FILTER_NEAREST) return 0.0f;
-    switch (format->image_channel_data_type)
-    {
-        case CL_SNORM_INT8: return 1.0f / 127.0f;
-        case CL_UNORM_INT8: return 1.0f / 255.0f;
-        case CL_UNORM_INT16: return 1.0f / 65535.0f;
-        case CL_SNORM_INT16: return 1.0f / 32767.0f;
-        case CL_FLOAT: return CL_FLT_MIN;
-        case CL_SFIXED14_APPLE: return 0x1.0p-14f;
-        default: return 0.0f;
-    }
-float get_max_relative_error(const cl_image_format *format,
-                             image_sampler_data *sampler, int is3D,
-                             int isLinearFilter)
+float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter )
     float maxError = 0.0f;
     float sampleCount = 1.0f;
-    if (isLinearFilter) sampleCount = is3D ? 8.0f : 4.0f;
+    if( isLinearFilter )
+        sampleCount =  is3D ? 8.0f : 4.0f;
-    // Note that the ULP is defined here as the unit in the last place of the
-    // maximum magnitude sample used for filtering.
+    // Note that the ULP is defined here as the unit in the last place of the maximum
+    // magnitude sample used for filtering.
     // Section 8.3
-    switch (format->image_channel_data_type)
+    switch( format->image_channel_data_type )
-        // The spec allows 2 ulps of error for normalized formats
+            // The spec allows 2 ulps of error for normalized formats
         case CL_SNORM_INT8:
         case CL_UNORM_INT8:
         case CL_SNORM_INT16:
@@ -910,42 +772,34 @@
         case CL_UNORM_SHORT_565:
         case CL_UNORM_SHORT_555:
         case CL_UNORM_INT_101010:
-            // Maximum sampling error for round to zero normalization based on
-            // multiplication by reciprocal (using reciprocal generated in
-            // round to +inf mode, so that 1.0 matches spec)
-            maxError = 2 * FLT_EPSILON * sampleCount;
+            maxError = 2*FLT_EPSILON*sampleCount;       // Maximum sampling error for round to zero normalization based on multiplication
+            // by reciprocal (using reciprocal generated in round to +inf mode, so that 1.0 matches spec)
-            // If the implementation supports these formats then it will have to
-            // allow rounding error here too, because not all 32-bit ints are
-            // exactly representable in float
+            // If the implementation supports these formats then it will have to allow rounding error here too,
+            // because not all 32-bit ints are exactly representable in float
         case CL_SIGNED_INT32:
-        case CL_UNSIGNED_INT32: maxError = 1 * FLT_EPSILON; break;
+        case CL_UNSIGNED_INT32:
+            maxError = 1*FLT_EPSILON;
+            break;
     // Section 8.2
-    if (sampler->addressing_mode == CL_ADDRESS_REPEAT
-        || sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT
-        || sampler->filter_mode != CL_FILTER_NEAREST
-        || sampler->normalized_coords)
-#if defined(__APPLE__)
+    if( sampler->addressing_mode == CL_ADDRESS_REPEAT || sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT || sampler->filter_mode != CL_FILTER_NEAREST || sampler->normalized_coords )
+#if defined( __APPLE__ )
-        if (sampler->filter_mode != CL_FILTER_NEAREST)
+        if( sampler->filter_mode != CL_FILTER_NEAREST )
             // The maximum
-            if (gDeviceType == CL_DEVICE_TYPE_GPU)
-                // Some GPUs ain't so accurate
-                maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4);
+            if( gDeviceType == CL_DEVICE_TYPE_GPU )
+                maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4);              // Some GPUs ain't so accurate
-                // The standard method of 2d linear filtering delivers 4.0 ulps
-                // of error in round to nearest (8 in rtz).
+                // The standard method of 2d linear filtering delivers 4.0 ulps of error in round to nearest (8 in rtz).
                 maxError += 4.0f * FLT_EPSILON;
-            // normalized coordinates will introduce some error into the
-            // fractional part of the address, affecting results
-            maxError += 4.0f * FLT_EPSILON;
+            maxError += 4.0f * FLT_EPSILON;    // normalized coordinates will introduce some error into the fractional part of the address, affecting results
@@ -971,570 +825,828 @@
     return maxError;
-size_t get_format_max_int(const cl_image_format *format)
+size_t get_format_max_int( cl_image_format *format )
-    switch (format->image_channel_data_type)
+    switch( format->image_channel_data_type )
         case CL_SNORM_INT8:
-        case CL_SIGNED_INT8: return 127;
+        case CL_SIGNED_INT8:
+            return 127;
         case CL_UNORM_INT8:
-        case CL_UNSIGNED_INT8: return 255;
+        case CL_UNSIGNED_INT8:
+            return 255;
         case CL_SNORM_INT16:
-        case CL_SIGNED_INT16: return 32767;
+        case CL_SIGNED_INT16:
+            return 32767;
         case CL_UNORM_INT16:
-        case CL_UNSIGNED_INT16: return 65535;
+        case CL_UNSIGNED_INT16:
+            return 65535;
-        case CL_SIGNED_INT32: return 2147483647L;
+        case CL_SIGNED_INT32:
+            return 2147483647L;
-        case CL_UNSIGNED_INT32: return 4294967295LL;
-        case CL_UNORM_SHORT_565:
-        case CL_UNORM_SHORT_555: return 31;
-        case CL_UNORM_INT_101010: return 1023;
-        case CL_HALF_FLOAT: return 1 << 10;
-        case CL_SFIXED14_APPLE: return 16384;
-        default: return 0;
-    }
-int get_format_min_int(const cl_image_format *format)
-    switch (format->image_channel_data_type)
-    {
-        case CL_SNORM_INT8:
-        case CL_SIGNED_INT8: return -128;
-        case CL_UNORM_INT8:
-        case CL_UNSIGNED_INT8: return 0;
-        case CL_SNORM_INT16:
-        case CL_SIGNED_INT16: return -32768;
-        case CL_UNORM_INT16:
-        case CL_UNSIGNED_INT16: return 0;
-        case CL_SIGNED_INT32: return -2147483648LL;
-        case CL_UNSIGNED_INT32: return 0;
+        case CL_UNSIGNED_INT32:
+            return 4294967295LL;
         case CL_UNORM_SHORT_565:
         case CL_UNORM_SHORT_555:
-        case CL_UNORM_INT_101010: return 0;
+            return 31;
+        case CL_UNORM_INT_101010:
+            return 1023;
+        case CL_HALF_FLOAT:
+            return 1<<10;
+        case CL_SFIXED14_APPLE:
+            return 16384;
+        default:
+            return 0;
+    }
+int get_format_min_int( cl_image_format *format )
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SIGNED_INT8:
+            return -128;
+        case CL_UNORM_INT8:
+        case CL_UNSIGNED_INT8:
+            return 0;
+        case CL_SNORM_INT16:
+        case CL_SIGNED_INT16:
+            return -32768;
+        case CL_UNORM_INT16:
+        case CL_UNSIGNED_INT16:
+            return 0;
+        case CL_SIGNED_INT32:
+            return -2147483648LL;
+        case CL_UNSIGNED_INT32:
+            return 0;
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+        case CL_UNORM_INT_101010:
+            return 0;
         case CL_HALF_FLOAT: return -(1 << 10);
-        case CL_SFIXED14_APPLE: return -16384;
+        case CL_SFIXED14_APPLE:
+            return -16384;
-        default: return 0;
+        default:
+            return 0;
-cl_half convert_float_to_half(float f)
+float convert_half_to_float( unsigned short halfValue )
-    switch (gFloatToHalfRoundingMode)
+    // We have to take care of a few special cases, but in general, we just extract
+    // the same components from the half that exist in the float and re-stuff them
+    // For a description of the actual half format, see
+    // Note: we store these in 32-bit ints to make the bit manipulations easier later
+    int sign =     ( halfValue >> 15 ) & 0x0001;
+    int exponent = ( halfValue >> 10 ) & 0x001f;
+    int mantissa = ( halfValue )       & 0x03ff;
+    // Note: we use a union here to be able to access the bits of a float directly
+    union
-        case kRoundToNearestEven: return cl_half_from_float(f, CL_HALF_RTE);
-        case kRoundTowardZero: return cl_half_from_float(f, CL_HALF_RTZ);
+        unsigned int bits;
+        float floatValue;
+    } outFloat;
+    // Special cases first
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+        {
+            // If both exponent and mantissa are 0, the number is +/- 0
+            outFloat.bits  = sign << 31;
+            return outFloat.floatValue; // Already done!
+        }
+        // If exponent is 0, it's a denormalized number, so we renormalize it
+        // Note: this is not terribly efficient, but oh well
+        while( ( mantissa & 0x00000400 ) == 0 )
+        {
+            mantissa <<= 1;
+            exponent--;
+        }
+        // The first bit is implicit, so we take it off and inc the exponent accordingly
+        exponent++;
+        mantissa &= ~(0x00000400);
+    }
+    else if( exponent == 31 ) // Special-case "numbers"
+    {
+        // If the exponent is 31, it's a special case number (+/- infinity or NAN).
+        // If the mantissa is 0, it's infinity, else it's NAN, but in either case, the packing
+        // method is the same
+        outFloat.bits = ( sign << 31 ) | 0x7f800000 | ( mantissa << 13 );
+        return outFloat.floatValue;
+    }
+    // Plain ol' normalized number, so adjust to the ranges a 32-bit float expects and repack
+    exponent += ( 127 - 15 );
+    mantissa <<= 13;
+    outFloat.bits = ( sign << 31 ) | ( exponent << 23 ) | mantissa;
+    return outFloat.floatValue;
+cl_ushort convert_float_to_half( float f )
+    switch( gFloatToHalfRoundingMode )
+    {
+        case kRoundToNearestEven:
+            return float2half_rte( f );
+        case kRoundTowardZero:
+            return float2half_rtz( f );
-            log_error("ERROR: Test internal error -- unhandled or unknown "
-                      "float->half rounding mode.\n");
+            log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
             return 0xffff;
-cl_ulong get_image_size(image_descriptor const *imageInfo)
+cl_ushort float2half_rte( float f )
+    {
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+                }
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+        }
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+    return (u.u >> (24-11)) | sign;
+    }
+cl_ushort float2half_rtz( float f )
+    {
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+    //Nan
+    if( x != x )
+        {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+        }
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+        {
+        if( x == INFINITY )
+            return 0x7c00 | sign;
+        return 0x7bff | sign;
+        }
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        return (cl_ushort)((int) x | sign);
+    }
+    u.u &= 0xFFFFE000U;
+    u.u -= 0x38000000U;
+    return (u.u >> (24-11)) | sign;
+class TEST
+    TEST();
+static TEST t;
+void  __vstore_half_rte(float f, size_t index, uint16_t *p)
+    union{ unsigned int u; float f;} u;
+    u.f = f;
+    unsigned short r = (u.u >> 16) & 0x8000;
+    u.u &= 0x7fffffff;
+    if( u.u >= 0x33000000U )
+    {
+        if( u.u >= 0x47800000 )
+        {
+            if( u.u <= 0x7f800000 )
+                r |= 0x7c00;
+            else
+            {
+                r |= 0x7e00 | ( (u.u >> 13) & 0x3ff );
+            }
+        }
+        else
+        {
+            float x = u.f;
+            if( u.u < 0x38800000 )
+                u.u = 0x3f000000;
+            else
+                u.u += 0x06800000;
+            u.u &= 0x7f800000U;
+            x += u.f;
+            x -= u.f;
+            u.f = x * MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+            u.u >>= 13;
+            r |= (unsigned short) u.u;
+        }
+    }
+    ((unsigned short*)p)[index] = r;
+    return;
+    union
+    {
+        float f;
+        uint32_t i;
+    } test;
+    uint16_t control, myval;
+    log_info(" &&&&&&&&&&&&&&&&&&&&&&&&&&&& TESTING HALFS &&&&&&&&&&&&&&&&&&&&\n" );
+    test.i = 0;
+    do
+    {
+        if( ( test.i & 0xffffff ) == 0 )
+        {
+            if( ( test.i & 0xfffffff ) == 0 )
+                log_info( "*" );
+            else
+                log_info( "." );
+            fflush(stdout);
+        }
+        __vstore_half_rte( test.f, 0, &control );
+        myval = convert_float_to_half( test.f );
+        if( myval != control )
+        {
+            log_info( "\n******** ERROR: MyVal %04x control %04x source %12.24f\n", myval, control, test.f );
+            log_info( "         source bits: %08x   %a\n", test.i, test.f );
+            float t, c;
+            c = convert_half_to_float( control );
+            t = convert_half_to_float( myval );
+            log_info( "         converted control: %12.24f myval: %12.24f\n", c, t );
+        }
+        test.i++;
+    } while( test.i != 0 );
+    log_info("\n &&&&&&&&&&&&&&&&&&&&&&&&&&&& TESTING HALFS &&&&&&&&&&&&&&&&&&&&\n" );
+cl_ulong get_image_size( image_descriptor const *imageInfo )
     cl_ulong imageSize;
     // Assumes rowPitch and slicePitch are always correctly defined
-    if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
+    if ( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
-        imageSize = (size_t)compute_mipmapped_image_size(*imageInfo);
+      imageSize = (size_t) compute_mipmapped_image_size(*imageInfo);
-        switch (imageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE1D: imageSize = imageInfo->rowPitch; break;
-            case CL_MEM_OBJECT_IMAGE2D:
-                imageSize = imageInfo->height * imageInfo->rowPitch;
-                break;
-            case CL_MEM_OBJECT_IMAGE3D:
-                imageSize = imageInfo->depth * imageInfo->slicePitch;
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                imageSize = imageInfo->arraySize * imageInfo->slicePitch;
-                break;
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                imageSize = imageInfo->arraySize * imageInfo->slicePitch;
-                break;
-            default:
-                log_error("ERROR: Cannot identify image type %x\n",
-                          imageInfo->type);
-                abort();
-        }
+      switch (imageInfo->type)
+      {
+      case CL_MEM_OBJECT_IMAGE1D:
+        imageSize = imageInfo->rowPitch;
+        break;
+      case CL_MEM_OBJECT_IMAGE2D:
+        imageSize = imageInfo->height * imageInfo->rowPitch;
+        break;
+      case CL_MEM_OBJECT_IMAGE3D:
+        imageSize = imageInfo->depth * imageInfo->slicePitch;
+        break;
+        imageSize = imageInfo->arraySize * imageInfo->slicePitch;
+        break;
+        imageSize = imageInfo->arraySize * imageInfo->slicePitch;
+        break;
+      default:
+        log_error("ERROR: Cannot identify image type %x\n", imageInfo->type);
+        abort();
+      }
     return imageSize;
-// Calculate image size in megabytes (strictly, mebibytes). Result is rounded
-// up.
-cl_ulong get_image_size_mb(image_descriptor const *imageInfo)
+// Calculate image size in megabytes (strictly, mebibytes). Result is rounded up.
+cl_ulong get_image_size_mb( image_descriptor const *imageInfo )
-    cl_ulong imageSize = get_image_size(imageInfo);
-    cl_ulong mb = imageSize / (1024 * 1024);
-    if (imageSize % (1024 * 1024) > 0)
+    cl_ulong imageSize = get_image_size( imageInfo );
+    cl_ulong mb = imageSize / ( 1024 * 1024 );
+    if ( imageSize % ( 1024 * 1024 ) > 0 )
         mb += 1;
-    return mb;
+    return  mb;
 uint64_t gRoundingStartValue = 0;
-void escape_inf_nan_values(char *data, size_t allocSize)
+void escape_inf_nan_values( char* data, size_t allocSize ) {
     // filter values with 8 not-quite-highest bits
     unsigned int *intPtr = (unsigned int *)data;
-    for (size_t i = 0; i<allocSize>> 2; i++)
+    for( size_t i = 0; i < allocSize >> 2; i++ )
-        if ((intPtr[i] & 0x7F800000) == 0x7F800000) intPtr[i] ^= 0x40000000;
+        if( ( intPtr[ i ] & 0x7F800000 ) == 0x7F800000 )
+            intPtr[ i ] ^= 0x40000000;
-    // Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits
-    // = 0x7C00 are special)
+    // Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits = 0x7C00 are special)
     unsigned short *shortPtr = (unsigned short *)data;
-    for (size_t i = 0; i<allocSize>> 1; i++)
+    for( size_t i = 0; i < allocSize >> 1; i++ )
-        if ((shortPtr[i] & 0x7C00) == 0x7C00) shortPtr[i] ^= 0x4000;
+        if( ( shortPtr[ i ] & 0x7C00 ) == 0x7C00 )
+            shortPtr[ i ] ^= 0x4000;
-char *generate_random_image_data(image_descriptor *imageInfo,
-                                 BufferOwningPtr<char> &P, MTdata d)
+char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d )
-    size_t allocSize = get_image_size(imageInfo);
-    size_t pixelRowBytes = imageInfo->width * get_pixel_size(imageInfo->format);
+    size_t allocSize = get_image_size( imageInfo );
+    size_t pixelRowBytes = imageInfo->width * get_pixel_size( imageInfo->format );
     size_t i;
     if (imageInfo->num_mip_levels > 1)
-        allocSize = compute_mipmapped_image_size(*imageInfo);
+      allocSize = compute_mipmapped_image_size(*imageInfo);
-#if defined(__APPLE__)
+#if defined (__APPLE__ )
     char *data = NULL;
-    if (gDeviceType == CL_DEVICE_TYPE_CPU)
-    {
+    if (gDeviceType == CL_DEVICE_TYPE_CPU) {
         size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192;
-        void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE,
-                         MAP_ANON | MAP_PRIVATE, 0, 0);
+        void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
         intptr_t data_end = (intptr_t)map + mapSize - 4096;
         data = (char *)(data_end - (intptr_t)allocSize);
         mprotect(map, 4096, PROT_NONE);
         mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
-        P.reset(data, map, mapSize, allocSize);
-    }
-    else
-    {
+        P.reset(data, map, mapSize,allocSize);
+    } else {
         data = (char *)malloc(allocSize);
-        P.reset(data, NULL, 0, allocSize);
+        P.reset(data,NULL,0,allocSize);
-    P.reset(NULL); // Free already allocated memory first, then try to allocate
-                   // new block.
-    char *data =
-        (char *)align_malloc(allocSize, get_pixel_alignment(imageInfo->format));
-    P.reset(data, NULL, 0, allocSize, true);
+    P.reset( NULL ); // Free already allocated memory first, then try to allocate new block.
+    char *data = (char *)align_malloc(allocSize, get_pixel_size(imageInfo->format));
+    P.reset(data,NULL,0,allocSize, true);
-    if (data == NULL)
-    {
-        log_error("ERROR: Unable to malloc %lu bytes for "
-                  "generate_random_image_data\n",
-                  allocSize);
-        return 0;
+    if (data == NULL) {
+      log_error( "ERROR: Unable to malloc %lu bytes for generate_random_image_data\n", allocSize );
+      return 0;
-    if (gTestRounding)
+    if( gTestRounding )
         // Special case: fill with a ramp from 0 to the size of the type
-        size_t typeSize = get_format_type_size(imageInfo->format);
-        switch (typeSize)
+        size_t typeSize = get_format_type_size( imageInfo->format );
+        switch( typeSize )
-            case 1: {
+            case 1:
+            {
                 char *ptr = data;
-                for (i = 0; i < allocSize; i++)
-                    ptr[i] = (cl_char)(i + gRoundingStartValue);
+                for( i = 0; i < allocSize; i++ )
+                    ptr[i] = (cl_char) (i + gRoundingStartValue);
-            break;
-            case 2: {
-                cl_short *ptr = (cl_short *)data;
-                for (i = 0; i < allocSize / 2; i++)
-                    ptr[i] = (cl_short)(i + gRoundingStartValue);
+                break;
+            case 2:
+            {
+                cl_short *ptr = (cl_short*) data;
+                for( i = 0; i < allocSize / 2; i++ )
+                    ptr[i] = (cl_short) (i +  gRoundingStartValue);
-            break;
-            case 4: {
-                cl_int *ptr = (cl_int *)data;
-                for (i = 0; i < allocSize / 4; i++)
-                    ptr[i] = (cl_int)(i + gRoundingStartValue);
+                break;
+            case 4:
+            {
+                cl_int *ptr = (cl_int*) data;
+                for( i = 0; i < allocSize / 4; i++ )
+                    ptr[i] = (cl_int) (i +  gRoundingStartValue);
-            break;
+                break;
-        // Note: inf or nan float values would cause problems, although we don't
-        // know this will actually be a float, so we just know what to look for
-        escape_inf_nan_values(data, allocSize);
+        // Note: inf or nan float values would cause problems, although we don't know this will
+        // actually be a float, so we just know what to look for
+        escape_inf_nan_values( data, allocSize );
         return data;
     // Otherwise, we should be able to just fill with random bits no matter what
-    cl_uint *p = (cl_uint *)data;
-    for (i = 0; i + 4 <= allocSize; i += 4) p[i / 4] = genrand_int32(d);
+    cl_uint *p = (cl_uint*) data;
+    for( i = 0; i + 4 <= allocSize; i += 4 )
+        p[ i / 4 ] = genrand_int32(d);
-    for (; i < allocSize; i++) data[i] = genrand_int32(d);
+    for( ; i < allocSize; i++ )
+        data[i] = genrand_int32(d);
-    // Note: inf or nan float values would cause problems, although we don't
-    // know this will actually be a float, so we just know what to look for
-    escape_inf_nan_values(data, allocSize);
+    // Note: inf or nan float values would cause problems, although we don't know this will
+    // actually be a float, so we just know what to look for
+    escape_inf_nan_values( data, allocSize );
-    if (/*!gTestMipmaps*/ imageInfo->num_mip_levels < 2)
+    if ( /*!gTestMipmaps*/ imageInfo->num_mip_levels < 2 )
-        // Fill unused edges with -1, NaN for float
-        if (imageInfo->rowPitch > pixelRowBytes)
-        {
-            size_t height = 0;
+      // Fill unused edges with -1, NaN for float
+      if (imageInfo->rowPitch > pixelRowBytes)
+      {
+          size_t height = 0;
-            switch (imageInfo->type)
-            {
-                case CL_MEM_OBJECT_IMAGE2D:
-                case CL_MEM_OBJECT_IMAGE3D:
-                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                    height = imageInfo->height;
-                    break;
-                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                    height = imageInfo->arraySize;
-                    break;
+          switch (imageInfo->type)
+          {
+              case CL_MEM_OBJECT_IMAGE2D:
+              case CL_MEM_OBJECT_IMAGE3D:
+              case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                  height = imageInfo->height;
+                  break;
+              case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                  height = imageInfo->arraySize;
+                  break;
             // Fill in the row padding regions
-            for (i = 0; i < height; i++)
+            for( i = 0; i < height; i++ )
                 size_t offset = i * imageInfo->rowPitch + pixelRowBytes;
                 size_t length = imageInfo->rowPitch - pixelRowBytes;
-                memset(data + offset, 0xff, length);
+                memset( data + offset, 0xff, length );
-        }
+      }
-        // Fill in the slice padding regions, if necessary:
+      // Fill in the slice padding regions, if necessary:
-        size_t slice_dimension = imageInfo->height;
-        if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-        {
-            slice_dimension = imageInfo->arraySize;
-        }
+      size_t slice_dimension = imageInfo->height;
+      if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+          slice_dimension = imageInfo->arraySize;
+      }
-        if (imageInfo->slicePitch > slice_dimension * imageInfo->rowPitch)
-        {
-            size_t depth = 0;
-            switch (imageInfo->type)
-            {
-                case CL_MEM_OBJECT_IMAGE2D:
-                case CL_MEM_OBJECT_IMAGE3D: depth = imageInfo->depth; break;
-                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                    depth = imageInfo->arraySize;
-                    break;
-            }
+      if (imageInfo->slicePitch > slice_dimension*imageInfo->rowPitch)
+      {
+          size_t depth = 0;
+          switch (imageInfo->type)
+          {
+            case CL_MEM_OBJECT_IMAGE2D:
+            case CL_MEM_OBJECT_IMAGE3D:
+                depth = imageInfo->depth;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                depth = imageInfo->arraySize;
+                break;
+          }
-            for (i = 0; i < depth; i++)
-            {
-                size_t offset = i * imageInfo->slicePitch
-                    + slice_dimension * imageInfo->rowPitch;
-                size_t length = imageInfo->slicePitch
-                    - slice_dimension * imageInfo->rowPitch;
-                memset(data + offset, 0xff, length);
-            }
-        }
+          for( i = 0; i < depth; i++ )
+          {
+              size_t offset = i * imageInfo->slicePitch + slice_dimension*imageInfo->rowPitch;
+              size_t length = imageInfo->slicePitch - slice_dimension*imageInfo->rowPitch;
+              memset( data + offset, 0xff, length );
+          }
+      }
     return data;
-#define CLAMP_FLOAT(v) (fmaxf(fminf(v, 1.f), -1.f))
+#define CLAMP_FLOAT( v ) ( fmaxf( fminf( v, 1.f ), -1.f ) )
-void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x,
-                            int y, int z, float *outData, int lod)
+void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData, int lod )
-    size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
-           depth_lod = imageInfo->depth;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
     size_t slice_pitch_lod = 0, row_pitch_lod = 0;
-    if (imageInfo->num_mip_levels > 1)
+    if ( imageInfo->num_mip_levels > 1 )
-        switch (imageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE3D:
-                depth_lod =
-                    (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-            case CL_MEM_OBJECT_IMAGE2D:
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                height_lod =
-                    (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            default:
-                width_lod =
-                    (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-        }
-        row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
-        if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-            slice_pitch_lod = row_pitch_lod;
-        else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
-                 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
-            slice_pitch_lod = row_pitch_lod * height_lod;
+      switch(imageInfo->type)
+      {
+      case CL_MEM_OBJECT_IMAGE3D :
+        depth_lod = ( imageInfo->depth >> lod ) ? ( imageInfo->depth >> lod ) : 1;
+      case CL_MEM_OBJECT_IMAGE2D :
+        height_lod = ( imageInfo->height >> lod ) ? ( imageInfo->height >> lod ) : 1;
+      default :
+        width_lod = ( imageInfo->width >> lod ) ? ( imageInfo->width >> lod ) : 1;
+      }
+      row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
+      if ( imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY )
+        slice_pitch_lod = row_pitch_lod;
+      else if ( imageInfo->type == CL_MEM_OBJECT_IMAGE3D || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+        slice_pitch_lod = row_pitch_lod * height_lod;
-        row_pitch_lod = imageInfo->rowPitch;
-        slice_pitch_lod = imageInfo->slicePitch;
+      row_pitch_lod = imageInfo->rowPitch;
+      slice_pitch_lod = imageInfo->slicePitch;
-    if (x < 0 || y < 0 || z < 0 || x >= (int)width_lod
-        || (height_lod != 0 && y >= (int)height_lod)
-        || (depth_lod != 0 && z >= (int)depth_lod)
-        || (imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize))
+    if ( x < 0 || y < 0 || z < 0 || x >= (int)width_lod
+               || ( height_lod != 0 && y >= (int)height_lod )
+               || ( depth_lod != 0 && z >= (int)depth_lod )
+               || ( imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize ) )
-        outData[0] = outData[1] = outData[2] = outData[3] = 0;
-        if (!has_alpha(imageInfo->format)) outData[3] = 1;
+            outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
+            if (!has_alpha(imageInfo->format))
+                outData[3] = 1;
-    const cl_image_format *format = imageInfo->format;
+    cl_image_format *format = imageInfo->format;
     unsigned int i;
-    float tempData[4];
+    float tempData[ 4 ];
     // Advance to the right spot
     char *ptr = (char *)imageData;
-    size_t pixelSize = get_pixel_size(format);
+    size_t pixelSize = get_pixel_size( format );
     ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
     // OpenCL only supports reading floats from certain formats
-    size_t channelCount = get_format_channel_count(format);
-    switch (format->image_channel_data_type)
+    size_t channelCount = get_format_channel_count( format );
+    switch( format->image_channel_data_type )
-        case CL_SNORM_INT8: {
+        case CL_SNORM_INT8:
+        {
             cl_char *dPtr = (cl_char *)ptr;
-            for (i = 0; i < channelCount; i++)
-                tempData[i] = CLAMP_FLOAT((float)dPtr[i] / 127.0f);
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = CLAMP_FLOAT( (float)dPtr[ i ] / 127.0f );
-        case CL_UNORM_INT8: {
+        case CL_UNORM_INT8:
+        {
             unsigned char *dPtr = (unsigned char *)ptr;
-            for (i = 0; i < channelCount; i++)
-            {
-                if ((is_sRGBA_order(imageInfo->format->image_channel_order))
-                    && i < 3) // only RGB need to be converted for sRGBA
-                    tempData[i] = (float)sRGBunmap((float)dPtr[i] / 255.0f);
+            for( i = 0; i < channelCount; i++ ) {
+                if((is_sRGBA_order(imageInfo->format->image_channel_order)) && i<3) // only RGB need to be converted for sRGBA
+                    tempData[ i ] = (float)sRGBunmap((float)dPtr[ i ] / 255.0f) ;
-                    tempData[i] = (float)dPtr[i] / 255.0f;
+                    tempData[ i ] = (float)dPtr[ i ] / 255.0f;
-        case CL_SIGNED_INT8: {
+        case CL_SIGNED_INT8:
+        {
             cl_char *dPtr = (cl_char *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] =  (float)dPtr[ i ];
-        case CL_UNSIGNED_INT8: {
+        case CL_UNSIGNED_INT8:
+        {
             cl_uchar *dPtr = (cl_uchar *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float) dPtr[ i ];
-        case CL_SNORM_INT16: {
+        case CL_SNORM_INT16:
+        {
             cl_short *dPtr = (cl_short *)ptr;
-            for (i = 0; i < channelCount; i++)
-                tempData[i] = CLAMP_FLOAT((float)dPtr[i] / 32767.0f);
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = CLAMP_FLOAT( (float)dPtr[ i ] / 32767.0f );
-        case CL_UNORM_INT16: {
+        case CL_UNORM_INT16:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            for (i = 0; i < channelCount; i++)
-                tempData[i] = (float)dPtr[i] / 65535.0f;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ] / 65535.0f;
-        case CL_SIGNED_INT16: {
+        case CL_SIGNED_INT16:
+        {
             cl_short *dPtr = (cl_short *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
-        case CL_UNSIGNED_INT16: {
+        case CL_UNSIGNED_INT16:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float) dPtr[ i ];
-        case CL_HALF_FLOAT: {
-            cl_half *dPtr = (cl_half *)ptr;
-            for (i = 0; i < channelCount; i++)
-                tempData[i] = cl_half_to_float(dPtr[i]);
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = convert_half_to_float( dPtr[ i ] );
-        case CL_SIGNED_INT32: {
+        case CL_SIGNED_INT32:
+        {
             cl_int *dPtr = (cl_int *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
-        case CL_UNSIGNED_INT32: {
+        case CL_UNSIGNED_INT32:
+        {
             cl_uint *dPtr = (cl_uint *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
-        case CL_UNORM_SHORT_565: {
+        case CL_UNORM_SHORT_565:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            tempData[0] = (float)(dPtr[0] >> 11) / (float)31;
-            tempData[1] = (float)((dPtr[0] >> 5) & 63) / (float)63;
-            tempData[2] = (float)(dPtr[0] & 31) / (float)31;
+            tempData[ 0 ] = (float)( dPtr[ 0 ] >> 11 ) / (float)31;
+            tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 5 ) & 63 ) / (float)63;
+            tempData[ 2 ] = (float)( dPtr[ 0 ] & 31 ) / (float)31;
-        case CL_UNORM_SHORT_555: {
+        case CL_UNORM_SHORT_555:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            tempData[0] = (float)((dPtr[0] >> 10) & 31) / (float)31;
-            tempData[1] = (float)((dPtr[0] >> 5) & 31) / (float)31;
-            tempData[2] = (float)(dPtr[0] & 31) / (float)31;
+            tempData[ 0 ] = (float)( ( dPtr[ 0 ] >> 10 ) & 31 ) / (float)31;
+            tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 5 ) & 31 ) / (float)31;
+            tempData[ 2 ] = (float)( dPtr[ 0 ] & 31 ) / (float)31;
-        case CL_UNORM_INT_101010: {
+        case CL_UNORM_INT_101010:
+        {
             cl_uint *dPtr = (cl_uint *)ptr;
-            tempData[0] = (float)((dPtr[0] >> 20) & 0x3ff) / (float)1023;
-            tempData[1] = (float)((dPtr[0] >> 10) & 0x3ff) / (float)1023;
-            tempData[2] = (float)(dPtr[0] & 0x3ff) / (float)1023;
+            tempData[ 0 ] = (float)( ( dPtr[ 0 ] >> 20 ) & 0x3ff ) / (float)1023;
+            tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 10 ) & 0x3ff ) / (float)1023;
+            tempData[ 2 ] = (float)( dPtr[ 0 ] & 0x3ff ) / (float)1023;
-        case CL_FLOAT: {
+        case CL_FLOAT:
+        {
             float *dPtr = (float *)ptr;
-            for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
-        case CL_SFIXED14_APPLE: {
-            cl_ushort *dPtr = (cl_ushort *)ptr;
-            for (i = 0; i < channelCount; i++)
-                tempData[i] = ((int)dPtr[i] - 16384) * 0x1.0p-14f;
+#ifdef  CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+        {
+            cl_ushort *dPtr = (cl_ushort*) ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[i] = ((int) dPtr[i] - 16384) * 0x1.0p-14f;
-    outData[0] = outData[1] = outData[2] = 0;
-    outData[3] = 1;
+    outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
+    outData[ 3 ] = 1;
-    switch (format->image_channel_order)
+    switch( format->image_channel_order )
-        case CL_A: outData[3] = tempData[0]; break;
+        case CL_A:
+            outData[ 3 ] = tempData[ 0 ];
+            break;
         case CL_R:
-        case CL_Rx: outData[0] = tempData[0]; break;
+        case CL_Rx:
+            outData[ 0 ] = tempData[ 0 ];
+            break;
         case CL_RA:
-            outData[0] = tempData[0];
-            outData[3] = tempData[1];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 3 ] = tempData[ 1 ];
         case CL_RG:
         case CL_RGx:
-            outData[0] = tempData[0];
-            outData[1] = tempData[1];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
         case CL_RGB:
         case CL_RGBx:
         case CL_sRGB:
         case CL_sRGBx:
-            outData[0] = tempData[0];
-            outData[1] = tempData[1];
-            outData[2] = tempData[2];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 2 ];
         case CL_RGBA:
-            outData[0] = tempData[0];
-            outData[1] = tempData[1];
-            outData[2] = tempData[2];
-            outData[3] = tempData[3];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 2 ];
+            outData[ 3 ] = tempData[ 3 ];
         case CL_ARGB:
-            outData[0] = tempData[1];
-            outData[1] = tempData[2];
-            outData[2] = tempData[3];
-            outData[3] = tempData[0];
-            break;
-        case CL_ABGR:
-            outData[0] = tempData[3];
-            outData[1] = tempData[2];
-            outData[2] = tempData[1];
-            outData[3] = tempData[0];
+            outData[ 0 ] = tempData[ 1 ];
+            outData[ 1 ] = tempData[ 2 ];
+            outData[ 2 ] = tempData[ 3 ];
+            outData[ 3 ] = tempData[ 0 ];
         case CL_BGRA:
         case CL_sBGRA:
-            outData[0] = tempData[2];
-            outData[1] = tempData[1];
-            outData[2] = tempData[0];
-            outData[3] = tempData[3];
+            outData[ 0 ] = tempData[ 2 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 0 ];
+            outData[ 3 ] = tempData[ 3 ];
         case CL_INTENSITY:
-            outData[0] = tempData[0];
-            outData[1] = tempData[0];
-            outData[2] = tempData[0];
-            outData[3] = tempData[0];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 0 ];
+            outData[ 2 ] = tempData[ 0 ];
+            outData[ 3 ] = tempData[ 0 ];
         case CL_LUMINANCE:
-            outData[0] = tempData[0];
-            outData[1] = tempData[0];
-            outData[2] = tempData[0];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 0 ];
+            outData[ 2 ] = tempData[ 0 ];
 #ifdef CL_1RGB_APPLE
         case CL_1RGB_APPLE:
-            outData[0] = tempData[1];
-            outData[1] = tempData[2];
-            outData[2] = tempData[3];
-            outData[3] = 1.0f;
+            outData[ 0 ] = tempData[ 1 ];
+            outData[ 1 ] = tempData[ 2 ];
+            outData[ 2 ] = tempData[ 3 ];
+            outData[ 3 ] = 1.0f;
 #ifdef CL_BGR1_APPLE
         case CL_BGR1_APPLE:
-            outData[0] = tempData[2];
-            outData[1] = tempData[1];
-            outData[2] = tempData[0];
-            outData[3] = 1.0f;
+            outData[ 0 ] = tempData[ 2 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 0 ];
+            outData[ 3 ] = 1.0f;
         case CL_sRGBA:
-            outData[0] = tempData[0];
-            outData[1] = tempData[1];
-            outData[2] = tempData[2];
-            outData[3] = tempData[3];
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 2 ];
+            outData[ 3 ] = tempData[ 3 ];
-        case CL_DEPTH: outData[0] = tempData[0]; break;
+        case CL_DEPTH:
+            outData[ 0 ] = tempData[ 0 ];
+            break;
             log_error("Invalid format:");
             print_header(format, true);
@@ -1542,122 +1654,105 @@
-void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x,
-                            int y, int z, float *outData)
+void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData )
-    read_image_pixel_float(imageData, imageInfo, x, y, z, outData, 0);
+  read_image_pixel_float( imageData, imageInfo, x, y, z, outData, 0 );
-bool get_integer_coords(float x, float y, float z, size_t width, size_t height,
-                        size_t depth, image_sampler_data *imageSampler,
-                        image_descriptor *imageInfo, int &outX, int &outY,
-                        int &outZ)
-    return get_integer_coords_offset(x, y, z, 0.0f, 0.0f, 0.0f, width, height,
-                                     depth, imageSampler, imageInfo, outX, outY,
-                                     outZ);
+bool get_integer_coords( float x, float y, float z, size_t width, size_t height, size_t depth, image_sampler_data *imageSampler, image_descriptor *imageInfo, int &outX, int &outY, int &outZ ) {
+    return get_integer_coords_offset(x, y, z, 0.0f, 0.0f, 0.0f, width, height, depth, imageSampler, imageInfo, outX, outY, outZ);
-bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset,
-                               float yAddressOffset, float zAddressOffset,
-                               size_t width, size_t height, size_t depth,
-                               image_sampler_data *imageSampler,
-                               image_descriptor *imageInfo, int &outX,
-                               int &outY, int &outZ)
+bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                               size_t width, size_t height, size_t depth, image_sampler_data *imageSampler, image_descriptor *imageInfo, int &outX, int &outY, int &outZ )
-    AddressFn adFn = sAddressingTable[imageSampler];
+    AddressFn adFn = sAddressingTable[ imageSampler ];
-    float refX = floorf(x), refY = floorf(y), refZ = floorf(z);
+    float refX = floorf( x ), refY = floorf( y ), refZ = floorf( z );
     // Handle sampler-directed coordinate normalization + clamping.  Note that
     // the array coordinate for image array types is expected to be
     // unnormalized, and is clamped to 0..arraySize-1.
-    if (imageSampler->normalized_coords)
+    if( imageSampler->normalized_coords )
         switch (imageSampler->addressing_mode)
             case CL_ADDRESS_REPEAT:
-                x = RepeatNormalizedAddressFn(x, width);
-                if (height != 0)
-                {
+                x = RepeatNormalizedAddressFn( x, width );
+                if (height != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
-                        y = RepeatNormalizedAddressFn(y, height);
+                        y = RepeatNormalizedAddressFn( y, height );
-                if (depth != 0)
-                {
+                if (depth != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
-                        z = RepeatNormalizedAddressFn(z, depth);
+                        z = RepeatNormalizedAddressFn( z, depth );
-                if (xAddressOffset != 0.0)
-                {
+                if (xAddressOffset != 0.0) {
                     // Add in the offset
                     x += xAddressOffset;
                     // Handle wrapping
-                    if (x > width) x -= (float)width;
-                    if (x < 0) x += (float)width;
+                    if (x > width)
+                        x -= (float)width;
+                    if (x < 0)
+                        x += (float)width;
-                if ((yAddressOffset != 0.0)
-                    && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY))
-                {
+                if ( (yAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) ) {
                     // Add in the offset
                     y += yAddressOffset;
                     // Handle wrapping
-                    if (y > height) y -= (float)height;
-                    if (y < 0) y += (float)height;
+                    if (y > height)
+                        y -= (float)height;
+                    if (y < 0)
+                        y += (float)height;
-                if ((zAddressOffset != 0.0)
-                    && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY))
-                {
+                if ( (zAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) )  {
                     // Add in the offset
                     z += zAddressOffset;
                     // Handle wrapping
-                    if (z > depth) z -= (float)depth;
-                    if (z < 0) z += (float)depth;
+                    if (z > depth)
+                        z -= (float)depth;
+                    if (z < 0)
+                        z += (float)depth;
-                x = MirroredRepeatNormalizedAddressFn(x, width);
-                if (height != 0)
-                {
+                x = MirroredRepeatNormalizedAddressFn( x, width );
+                if (height != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
-                        y = MirroredRepeatNormalizedAddressFn(y, height);
+                        y = MirroredRepeatNormalizedAddressFn( y, height );
-                if (depth != 0)
-                {
+                if (depth != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
-                        z = MirroredRepeatNormalizedAddressFn(z, depth);
+                        z = MirroredRepeatNormalizedAddressFn( z, depth );
                 if (xAddressOffset != 0.0)
                     float temp = x + xAddressOffset;
-                    if (temp > (float)width)
-                        temp = (float)width - (temp - (float)width);
-                    x = fabsf(temp);
+                    if( temp > (float) width )
+                        temp = (float) width - (temp - (float) width );
+                    x = fabsf( temp );
-                if ((yAddressOffset != 0.0)
-                    && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY))
-                {
+                if ( (yAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) ) {
                     float temp = y + yAddressOffset;
-                    if (temp > (float)height)
-                        temp = (float)height - (temp - (float)height);
-                    y = fabsf(temp);
+                    if( temp > (float) height )
+                        temp = (float) height - (temp - (float) height );
+                    y = fabsf( temp );
-                if ((zAddressOffset != 0.0)
-                    && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY))
-                {
+                if ( (zAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) )  {
                     float temp = z + zAddressOffset;
-                    if (temp > (float)depth)
-                        temp = (float)depth - (temp - (float)depth);
-                    z = fabsf(temp);
+                    if( temp > (float) depth )
+                        temp = (float) depth - (temp - (float) depth );
+                    z = fabsf( temp );
-                // Also, remultiply to the original coords. This simulates any
-                // truncation in the pass to OpenCL
+                // Also, remultiply to the original coords. This simulates any truncation in
+                // the pass to OpenCL
                 x *= (float)width;
                 x += xAddressOffset;
@@ -1678,54 +1773,59 @@
     // At this point, we're dealing with non-normalized coordinates.
-    outX = adFn(floorf(x), width);
+    outX = adFn( floorf( x ), width );
     // 1D and 2D arrays require special care for the index coordinate:
-    switch (imageInfo->type)
-    {
+    switch (imageInfo->type) {
             outY = calculate_array_index(y, (float)imageInfo->arraySize - 1.0f);
             outZ = 0.0f; /* don't care! */
-            outY = adFn(floorf(y), height);
+            outY = adFn( floorf( y ), height );
             outZ = calculate_array_index(z, (float)imageInfo->arraySize - 1.0f);
             // legacy path:
-            if (height != 0) outY = adFn(floorf(y), height);
-            if (depth != 0) outZ = adFn(floorf(z), depth);
+            if (height != 0)
+                outY = adFn( floorf( y ), height );
+            if( depth != 0 )
+                outZ = adFn( floorf( z ), depth );
-    return !((int)refX == outX && (int)refY == outY && (int)refZ == outZ);
+    return !( (int)refX == outX && (int)refY == outY && (int)refZ == outZ );
-static float frac(float a) { return a - floorf(a); }
+static float frac(float a) {
+    return a - floorf(a);
-static inline void pixelMax(const float a[4], const float b[4], float *results);
-static inline void pixelMax(const float a[4], const float b[4], float *results)
+static inline void pixelMax( const float a[4], const float b[4], float *results );
+static inline void pixelMax( const float a[4], const float b[4], float *results )
-    for (int i = 0; i < 4; i++) results[i] = errMax(fabsf(a[i]), fabsf(b[i]));
+    for( int i = 0; i < 4; i++ )
+        results[i] = errMax( fabsf(a[i]), fabsf(b[i]) );
 // If containsDenorms is NULL, flush denorms to zero
 // if containsDenorms is not NULL, record whether there are any denorms
-static inline void check_for_denorms(float a[4], int *containsDenorms);
-static inline void check_for_denorms(float a[4], int *containsDenorms)
+static inline void  check_for_denorms(float a[4], int *containsDenorms );
+static inline void  check_for_denorms(float a[4], int *containsDenorms )
-    if (NULL == containsDenorms)
+    if( NULL == containsDenorms )
-        for (int i = 0; i < 4; i++)
+        for( int i = 0; i < 4; i++ )
-            if (IsFloatSubnormal(a[i])) a[i] = copysignf(0.0f, a[i]);
+            if( IsFloatSubnormal( a[i] ) )
+                a[i] = copysignf( 0.0f, a[i] );
-        for (int i = 0; i < 4; i++)
+        for( int i = 0; i < 4; i++ )
-            if (IsFloatSubnormal(a[i]))
+            if( IsFloatSubnormal( a[i] ) )
                 *containsDenorms = 1;
@@ -1734,14 +1834,13 @@
-inline float calculate_array_index(float coord, float extent)
+inline float calculate_array_index( float coord, float extent ) {
     // from Section 8.4 of the 1.2 Spec 'Selecting an Image from an Image Array'
     // given coordinate 'w' that represents an index:
     // layer_index = clamp( rint(w), 0, image_array_size - 1)
-    float ret = rintf(coord);
+    float ret = rintf( coord );
     ret = ret > extent ? extent : ret;
     ret = ret < 0.0f ? 0.0f : ret;
@@ -1756,60 +1855,49 @@
  * offset   - an addressing offset to be added to the coordinate
  * extent   - the max value for this coordinate (e.g. width for x)
-static float unnormalize_coordinate(const char *name, float coord, float offset,
-                                    float extent,
-                                    cl_addressing_mode addressing_mode,
-                                    int verbose)
+static float unnormalize_coordinate( const char* name, float coord,
+    float offset, float extent, cl_addressing_mode addressing_mode, int verbose )
     float ret = 0.0f;
-    switch (addressing_mode)
-    {
+    switch (addressing_mode) {
         case CL_ADDRESS_REPEAT:
-            ret = RepeatNormalizedAddressFn(coord, extent);
+            ret = RepeatNormalizedAddressFn( coord, extent );
-            if (verbose)
-            {
-                log_info("\tRepeat filter denormalizes %s (%f) to %f\n", name,
-                         coord, ret);
+            if ( verbose ) {
+                log_info( "\tRepeat filter denormalizes %s (%f) to %f\n",
+                    name, coord, ret );
-            if (offset != 0.0)
-            {
+            if (offset != 0.0) {
                 // Add in the offset, and handle wrapping.
                 ret += offset;
                 if (ret > extent) ret -= extent;
                 if (ret < 0.0) ret += extent;
-            if (verbose && offset != 0.0f)
-            {
-                log_info("\tAddress offset of %f added to get %f\n", offset,
-                         ret);
+            if (verbose && offset != 0.0f) {
+                log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
-            ret = MirroredRepeatNormalizedAddressFn(coord, extent);
+            ret = MirroredRepeatNormalizedAddressFn( coord, extent );
-            if (verbose)
-            {
-                log_info(
-                    "\tMirrored repeat filter denormalizes %s (%f) to %f\n",
-                    name, coord, ret);
+            if ( verbose ) {
+                log_info( "\tMirrored repeat filter denormalizes %s (%f) to %f\n",
+                    name, coord, ret );
-            if (offset != 0.0)
-            {
+            if (offset != 0.0) {
                 float temp = ret + offset;
-                if (temp > extent) temp = extent - (temp - extent);
-                ret = fabsf(temp);
+                if( temp > extent )
+                    temp = extent - (temp - extent );
+                ret = fabsf( temp );
-            if (verbose && offset != 0.0f)
-            {
-                log_info("\tAddress offset of %f added to get %f\n", offset,
-                         ret);
+            if (verbose && offset != 0.0f) {
+                log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
@@ -1817,134 +1905,107 @@
             ret = coord * extent;
-            if (verbose)
-            {
-                log_info("\tFilter denormalizes %s to %f (%f * %f)\n", name,
-                         ret, coord, extent);
+            if ( verbose ) {
+                log_info( "\tFilter denormalizes %s to %f (%f * %f)\n",
+                    name, ret, coord, extent);
             ret += offset;
-            if (verbose && offset != 0.0f)
-            {
-                log_info("\tAddress offset of %f added to get %f\n", offset,
-                         ret);
+            if (verbose && offset != 0.0f) {
+                log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
     return ret;
-sample_image_pixel_float(void *imageData, image_descriptor *imageInfo, float x,
-                         float y, float z, image_sampler_data *imageSampler,
-                         float *outData, int verbose, int *containsDenorms)
-    return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f,
-                                           0.0f, 0.0f, imageSampler, outData,
-                                           verbose, containsDenorms);
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z,
+                                    image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms ) {
+    return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData, verbose, containsDenorms);
 // returns max pixel value of the pixels touched
-FloatPixel sample_image_pixel_float(void *imageData,
-                                    image_descriptor *imageInfo, float x,
-                                    float y, float z,
-                                    image_sampler_data *imageSampler,
-                                    float *outData, int verbose,
-                                    int *containsDenorms, int lod)
-    return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f,
-                                           0.0f, 0.0f, imageSampler, outData,
-                                           verbose, containsDenorms, lod);
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z,
+                                    image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms , int lod) {
+    return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData, verbose, containsDenorms, lod);
-FloatPixel sample_image_pixel_float_offset(
-    void *imageData, image_descriptor *imageInfo, float x, float y, float z,
-    float xAddressOffset, float yAddressOffset, float zAddressOffset,
-    image_sampler_data *imageSampler, float *outData, int verbose,
-    int *containsDenorms, int lod)
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms , int lod)
-    AddressFn adFn = sAddressingTable[imageSampler];
+    AddressFn adFn = sAddressingTable[ imageSampler ];
     FloatPixel returnVal;
-    size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
-           depth_lod = imageInfo->depth;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
     size_t slice_pitch_lod = 0, row_pitch_lod = 0;
-    if (imageInfo->num_mip_levels > 1)
+    if ( imageInfo->num_mip_levels > 1 )
-        switch (imageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE3D:
-                depth_lod =
-                    (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-            case CL_MEM_OBJECT_IMAGE2D:
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                height_lod =
-                    (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            default:
-                width_lod =
-                    (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-        }
-        row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
-        if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-            slice_pitch_lod = row_pitch_lod;
-        else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
-                 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
-            slice_pitch_lod = row_pitch_lod * height_lod;
+      switch(imageInfo->type)
+      {
+      case CL_MEM_OBJECT_IMAGE3D :
+        depth_lod = ( imageInfo->depth >> lod ) ? ( imageInfo->depth >> lod ) : 1;
+      case CL_MEM_OBJECT_IMAGE2D :
+        height_lod = ( imageInfo->height >> lod ) ? ( imageInfo->height >> lod ) : 1;
+      default :
+        width_lod = ( imageInfo->width >> lod ) ? ( imageInfo->width >> lod ) : 1;
+      }
+      row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
+      if ( imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY )
+        slice_pitch_lod = row_pitch_lod;
+      else if ( imageInfo->type == CL_MEM_OBJECT_IMAGE3D || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+        slice_pitch_lod = row_pitch_lod * height_lod;
-        slice_pitch_lod = imageInfo->slicePitch;
-        row_pitch_lod = imageInfo->rowPitch;
+      slice_pitch_lod = imageInfo->slicePitch;
+      row_pitch_lod = imageInfo->rowPitch;
-    if (containsDenorms) *containsDenorms = 0;
+    if( containsDenorms )
+        *containsDenorms = 0;
-    if (imageSampler->normalized_coords)
-    {
+    if( imageSampler->normalized_coords ) {
         // We need to unnormalize our coordinates differently depending on
         // the image type, but 'x' is always processed the same way.
         x = unnormalize_coordinate("x", x, xAddressOffset, (float)width_lod,
-                                   imageSampler->addressing_mode, verbose);
+            imageSampler->addressing_mode, verbose);
-        switch (imageInfo->type)
-        {
+        switch (imageInfo->type) {
-                // The image array types require special care:
+            // The image array types require special care:
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
                 z = 0; // don't care -- unused for 1D arrays
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                y = unnormalize_coordinate(
-                    "y", y, yAddressOffset, (float)height_lod,
+                y = unnormalize_coordinate("y", y, yAddressOffset, (float)height_lod,
                     imageSampler->addressing_mode, verbose);
-                // Everybody else:
+            // Everybody else:
-                y = unnormalize_coordinate(
-                    "y", y, yAddressOffset, (float)height_lod,
+                y = unnormalize_coordinate("y", y, yAddressOffset, (float)height_lod,
                     imageSampler->addressing_mode, verbose);
-                z = unnormalize_coordinate(
-                    "z", z, zAddressOffset, (float)depth_lod,
+                z = unnormalize_coordinate("z", z, zAddressOffset, (float)depth_lod,
                     imageSampler->addressing_mode, verbose);
-    }
-    else if (verbose)
-    {
-        switch (imageInfo->type)
-        {
+    } else if ( verbose ) {
+        switch (imageInfo->type) {
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
                 log_info("Starting coordinate: %f, array index %f\n", x, y);
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                log_info("Starting coordinate: %f, %f, array index %f\n", x, y,
-                         z);
+                log_info("Starting coordinate: %f, %f, array index %f\n", x, y, z);
             case CL_MEM_OBJECT_IMAGE1D:
             case CL_MEM_OBJECT_IMAGE1D_BUFFER:
@@ -1954,13 +2015,14 @@
                 log_info("Starting coordinate: %f, %f\n", x, y);
             case CL_MEM_OBJECT_IMAGE3D:
-            default: log_info("Starting coordinate: %f, %f, %f\n", x, y, z);
+            default:
+                log_info("Starting coordinate: %f, %f, %f\n", x, y, z);
     // At this point, we have unnormalized coordinates.
-    if (imageSampler->filter_mode == CL_FILTER_NEAREST)
+    if( imageSampler->filter_mode == CL_FILTER_NEAREST )
         int ix, iy, iz;
@@ -1968,50 +2030,42 @@
         // coordinates.  Note that the array cases again require special
         // care, per section 8.4 in the OpenCL 1.2 Specification.
-        ix = adFn(floorf(x), width_lod);
+        ix = adFn( floorf( x ), width_lod );
-        switch (imageInfo->type)
-        {
+        switch (imageInfo->type) {
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                iy =
-                    calculate_array_index(y, (float)(imageInfo->arraySize - 1));
+                iy = calculate_array_index( y, (float)(imageInfo->arraySize - 1) );
                 iz = 0;
-                if (verbose)
-                {
-                    log_info("\tArray index %f evaluates to %d\n", y, iy);
+                if( verbose ) {
+                  log_info("\tArray index %f evaluates to %d\n",y, iy );
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                iy = adFn(floorf(y), height_lod);
-                iz =
-                    calculate_array_index(z, (float)(imageInfo->arraySize - 1));
-                if (verbose)
-                {
-                    log_info("\tArray index %f evaluates to %d\n", z, iz);
+                iy = adFn( floorf( y ), height_lod );
+                iz = calculate_array_index( z, (float)(imageInfo->arraySize - 1) );
+                if( verbose ) {
+                    log_info("\tArray index %f evaluates to %d\n",z, iz );
-                iy = adFn(floorf(y), height_lod);
-                if (depth_lod != 0)
-                    iz = adFn(floorf(z), depth_lod);
+                iy = adFn( floorf( y ), height_lod );
+                if( depth_lod != 0 )
+                    iz = adFn( floorf( z ), depth_lod );
                     iz = 0;
-        if (verbose)
-        {
-            if (iz)
-                log_info(
-                    "\tReference integer coords calculated: { %d, %d, %d }\n",
-                    ix, iy, iz);
+        if( verbose ) {
+            if( iz )
+                log_info( "\tReference integer coords calculated: { %d, %d, %d }\n", ix, iy, iz );
-                log_info("\tReference integer coords calculated: { %d, %d }\n",
-                         ix, iy);
+                log_info( "\tReference integer coords calculated: { %d, %d }\n", ix, iy );
-        read_image_pixel_float(imageData, imageInfo, ix, iy, iz, outData, lod);
-        check_for_denorms(outData, containsDenorms);
-        for (int i = 0; i < 4; i++) returnVal.p[i] = fabsf(outData[i]);
+        read_image_pixel_float( imageData, imageInfo, ix, iy, iz, outData, lod );
+        check_for_denorms( outData, containsDenorms );
+        for( int i = 0; i < 4; i++ )
+            returnVal.p[i] = fabsf( outData[i] );
         return returnVal;
@@ -2023,23 +2077,19 @@
         // Image arrays can use 2D filtering, but require us to walk into the
         // image a certain number of slices before reading.
-        if (depth == 0 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY
-            || imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+        if( depth == 0 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
+                          imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
             float array_index = 0;
             size_t layer_offset = 0;
-            if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
-            {
-                array_index =
-                    calculate_array_index(z, (float)(imageInfo->arraySize - 1));
+            if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+                array_index = calculate_array_index(z, (float)(imageInfo->arraySize - 1));
                 layer_offset = slice_pitch_lod * (size_t)array_index;
-            else if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-            {
-                array_index =
-                    calculate_array_index(y, (float)(imageInfo->arraySize - 1));
+            else if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+                array_index = calculate_array_index(y, (float)(imageInfo->arraySize - 1));
                 layer_offset = slice_pitch_lod * (size_t)array_index;
                 // Set up y and height so that the filtering below is correct
@@ -2047,253 +2097,213 @@
                 height = 1;
-            int x1 = adFn(floorf(x - 0.5f), width);
+            int x1 = adFn( floorf( x - 0.5f ), width );
             int y1 = 0;
-            int x2 = adFn(floorf(x - 0.5f) + 1, width);
+            int x2 = adFn( floorf( x - 0.5f ) + 1, width );
             int y2 = 0;
-            if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
-                && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
-                && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER))
-            {
-                y1 = adFn(floorf(y - 0.5f), height);
-                y2 = adFn(floorf(y - 0.5f) + 1, height);
-            }
-            else
-            {
-                y = 0.5f;
+            if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D) &&
+                (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
+                (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
+                y1 = adFn( floorf( y - 0.5f ), height );
+                y2 = adFn( floorf( y - 0.5f ) + 1, height );
+            } else {
+              y = 0.5f;
-            if (verbose)
-            {
-                log_info("\tActual integer coords used (i = floor(x-.5)): i0:{ "
-                         "%d, %d } and i1:{ %d, %d }\n",
-                         x1, y1, x2, y2);
-                log_info("\tArray coordinate is %f\n", array_index);
+            if( verbose ) {
+                log_info( "\tActual integer coords used (i = floor(x-.5)): i0:{ %d, %d } and i1:{ %d, %d }\n", x1, y1, x2, y2 );
+                log_info( "\tArray coordinate is %f\n", array_index);
             // Walk to beginning of the 'correct' slice, if needed.
-            char *imgPtr = ((char *)imageData) + layer_offset;
+            char* imgPtr = ((char*)imageData) + layer_offset;
-            float upLeft[4], upRight[4], lowLeft[4], lowRight[4];
+            float upLeft[ 4 ], upRight[ 4 ], lowLeft[ 4 ], lowRight[ 4 ];
             float maxUp[4], maxLow[4];
-            read_image_pixel_float(imgPtr, imageInfo, x1, y1, 0, upLeft, lod);
-            read_image_pixel_float(imgPtr, imageInfo, x2, y1, 0, upRight, lod);
-            check_for_denorms(upLeft, containsDenorms);
-            check_for_denorms(upRight, containsDenorms);
-            pixelMax(upLeft, upRight, maxUp);
-            read_image_pixel_float(imgPtr, imageInfo, x1, y2, 0, lowLeft, lod);
-            read_image_pixel_float(imgPtr, imageInfo, x2, y2, 0, lowRight, lod);
-            check_for_denorms(lowLeft, containsDenorms);
-            check_for_denorms(lowRight, containsDenorms);
-            pixelMax(lowLeft, lowRight, maxLow);
-            pixelMax(maxUp, maxLow, returnVal.p);
+            read_image_pixel_float( imgPtr, imageInfo, x1, y1, 0, upLeft, lod );
+            read_image_pixel_float( imgPtr, imageInfo, x2, y1, 0, upRight, lod );
+            check_for_denorms( upLeft, containsDenorms );
+            check_for_denorms( upRight, containsDenorms );
+            pixelMax( upLeft, upRight, maxUp );
+            read_image_pixel_float( imgPtr, imageInfo, x1, y2, 0, lowLeft, lod );
+            read_image_pixel_float( imgPtr, imageInfo, x2, y2, 0, lowRight, lod );
+            check_for_denorms( lowLeft, containsDenorms );
+            check_for_denorms( lowRight, containsDenorms );
+            pixelMax( lowLeft, lowRight, maxLow );
+            pixelMax( maxUp, maxLow, returnVal.p );
-            if (verbose)
+            if( verbose )
-                if (NULL == containsDenorms)
-                    log_info("\tSampled pixels (rgba order, denorms flushed to "
-                             "zero):\n");
+                if( NULL == containsDenorms )
+                    log_info( "\tSampled pixels (rgba order, denorms flushed to zero):\n" );
-                    log_info("\tSampled pixels (rgba order):\n");
-                log_info("\t\tp00: %f, %f, %f, %f\n", upLeft[0], upLeft[1],
-                         upLeft[2], upLeft[3]);
-                log_info("\t\tp01: %f, %f, %f, %f\n", upRight[0], upRight[1],
-                         upRight[2], upRight[3]);
-                log_info("\t\tp10: %f, %f, %f, %f\n", lowLeft[0], lowLeft[1],
-                         lowLeft[2], lowLeft[3]);
-                log_info("\t\tp11: %f, %f, %f, %f\n", lowRight[0], lowRight[1],
-                         lowRight[2], lowRight[3]);
+                    log_info( "\tSampled pixels (rgba order):\n" );
+                log_info( "\t\tp00: %f, %f, %f, %f\n", upLeft[0], upLeft[1], upLeft[2], upLeft[3] );
+                log_info( "\t\tp01: %f, %f, %f, %f\n", upRight[0], upRight[1], upRight[2], upRight[3] );
+                log_info( "\t\tp10: %f, %f, %f, %f\n", lowLeft[0], lowLeft[1], lowLeft[2], lowLeft[3] );
+                log_info( "\t\tp11: %f, %f, %f, %f\n", lowRight[0], lowRight[1], lowRight[2], lowRight[3] );
             bool printMe = false;
-            if (x1 <= 0 || x2 <= 0 || x1 >= (int)width - 1
-                || x2 >= (int)width - 1)
+            if( x1 <= 0 || x2 <= 0 || x1 >= (int)width-1 || x2 >= (int)width-1 )
                 printMe = true;
-            if (y1 <= 0 || y2 <= 0 || y1 >= (int)height - 1
-                || y2 >= (int)height - 1)
+            if( y1 <= 0 || y2 <= 0 || y1 >= (int)height-1 || y2 >= (int)height-1 )
                 printMe = true;
-            double weights[2][2];
+            double weights[ 2 ][ 2 ];
-            weights[0][0] = weights[0][1] = 1.0 - frac(x - 0.5f);
-            weights[1][0] = weights[1][1] = frac(x - 0.5f);
-            weights[0][0] *= 1.0 - frac(y - 0.5f);
-            weights[1][0] *= 1.0 - frac(y - 0.5f);
-            weights[0][1] *= frac(y - 0.5f);
-            weights[1][1] *= frac(y - 0.5f);
+            weights[ 0 ][ 0 ] = weights[ 0 ][ 1 ] = 1.0 - frac( x - 0.5f );
+            weights[ 1 ][ 0 ] = weights[ 1 ][ 1 ] = frac( x - 0.5f );
+            weights[ 0 ][ 0 ] *= 1.0 - frac( y - 0.5f );
+            weights[ 1 ][ 0 ] *= 1.0 - frac( y - 0.5f );
+            weights[ 0 ][ 1 ] *= frac( y - 0.5f );
+            weights[ 1 ][ 1 ] *= frac( y - 0.5f );
-            if (verbose)
-                log_info("\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f\n",
-                         frac(x - 0.5f), frac(y - 0.5f));
+            if( verbose )
+                log_info( "\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f\n",  frac( x - 0.5f ), frac( y - 0.5f ) );
-            for (int i = 0; i < 3; i++)
+            for( int i = 0; i < 3; i++ )
-                outData[i] = (float)((upLeft[i] * weights[0][0])
-                                     + (upRight[i] * weights[1][0])
-                                     + (lowLeft[i] * weights[0][1])
-                                     + (lowRight[i] * weights[1][1]));
+                outData[ i ] = (float)( ( upLeft[ i ] * weights[ 0 ][ 0 ] ) +
+                                        ( upRight[ i ] * weights[ 1 ][ 0 ] ) +
+                                        ( lowLeft[ i ] * weights[ 0 ][ 1 ] ) +
+                                        ( lowRight[ i ] * weights[ 1 ][ 1 ] ));
                 // flush subnormal results to zero if necessary
-                if (NULL == containsDenorms && fabs(outData[i]) < FLT_MIN)
-                    outData[i] = copysignf(0.0f, outData[i]);
+                if( NULL == containsDenorms && fabs(outData[i]) < FLT_MIN )
+                    outData[i] = copysignf( 0.0f, outData[i] );
-            outData[3] = (float)((upLeft[3] * weights[0][0])
-                                 + (upRight[3] * weights[1][0])
-                                 + (lowLeft[3] * weights[0][1])
-                                 + (lowRight[3] * weights[1][1]));
+            outData[ 3 ] = (float)( ( upLeft[ 3 ] * weights[ 0 ][ 0 ] ) +
+                                   ( upRight[ 3 ] * weights[ 1 ][ 0 ] ) +
+                                   ( lowLeft[ 3 ] * weights[ 0 ][ 1 ] ) +
+                                   ( lowRight[ 3 ] * weights[ 1 ][ 1 ] ));
             // flush subnormal results to zero if necessary
-            if (NULL == containsDenorms && fabs(outData[3]) < FLT_MIN)
-                outData[3] = copysignf(0.0f, outData[3]);
+            if( NULL == containsDenorms && fabs(outData[3]) < FLT_MIN )
+                outData[3] = copysignf( 0.0f, outData[3] );
             // 3D linear filtering
-            int x1 = adFn(floorf(x - 0.5f), width_lod);
-            int y1 = adFn(floorf(y - 0.5f), height_lod);
-            int z1 = adFn(floorf(z - 0.5f), depth_lod);
-            int x2 = adFn(floorf(x - 0.5f) + 1, width_lod);
-            int y2 = adFn(floorf(y - 0.5f) + 1, height_lod);
-            int z2 = adFn(floorf(z - 0.5f) + 1, depth_lod);
+            int x1 = adFn( floorf( x - 0.5f ), width_lod );
+            int y1 = adFn( floorf( y - 0.5f ), height_lod );
+            int z1 = adFn( floorf( z - 0.5f ), depth_lod );
+            int x2 = adFn( floorf( x - 0.5f ) + 1, width_lod );
+            int y2 = adFn( floorf( y - 0.5f ) + 1, height_lod );
+            int z2 = adFn( floorf( z - 0.5f ) + 1, depth_lod );
-            if (verbose)
-                log_info("\tActual integer coords used (i = floor(x-.5)): "
-                         "i0:{%d, %d, %d} and i1:{%d, %d, %d}\n",
-                         x1, y1, z1, x2, y2, z2);
+            if( verbose )
+                log_info( "\tActual integer coords used (i = floor(x-.5)): i0:{%d, %d, %d} and i1:{%d, %d, %d}\n", x1, y1, z1, x2, y2, z2 );
-            float upLeftA[4], upRightA[4], lowLeftA[4], lowRightA[4];
-            float upLeftB[4], upRightB[4], lowLeftB[4], lowRightB[4];
+            float upLeftA[ 4 ], upRightA[ 4 ], lowLeftA[ 4 ], lowRightA[ 4 ];
+            float upLeftB[ 4 ], upRightB[ 4 ], lowLeftB[ 4 ], lowRightB[ 4 ];
             float pixelMaxA[4], pixelMaxB[4];
-            read_image_pixel_float(imageData, imageInfo, x1, y1, z1, upLeftA,
-                                   lod);
-            read_image_pixel_float(imageData, imageInfo, x2, y1, z1, upRightA,
-                                   lod);
-            check_for_denorms(upLeftA, containsDenorms);
-            check_for_denorms(upRightA, containsDenorms);
-            pixelMax(upLeftA, upRightA, pixelMaxA);
-            read_image_pixel_float(imageData, imageInfo, x1, y2, z1, lowLeftA,
-                                   lod);
-            read_image_pixel_float(imageData, imageInfo, x2, y2, z1, lowRightA,
-                                   lod);
-            check_for_denorms(lowLeftA, containsDenorms);
-            check_for_denorms(lowRightA, containsDenorms);
-            pixelMax(lowLeftA, lowRightA, pixelMaxB);
-            pixelMax(pixelMaxA, pixelMaxB, returnVal.p);
-            read_image_pixel_float(imageData, imageInfo, x1, y1, z2, upLeftB,
-                                   lod);
-            read_image_pixel_float(imageData, imageInfo, x2, y1, z2, upRightB,
-                                   lod);
-            check_for_denorms(upLeftB, containsDenorms);
-            check_for_denorms(upRightB, containsDenorms);
-            pixelMax(upLeftB, upRightB, pixelMaxA);
-            read_image_pixel_float(imageData, imageInfo, x1, y2, z2, lowLeftB,
-                                   lod);
-            read_image_pixel_float(imageData, imageInfo, x2, y2, z2, lowRightB,
-                                   lod);
-            check_for_denorms(lowLeftB, containsDenorms);
-            check_for_denorms(lowRightB, containsDenorms);
-            pixelMax(lowLeftB, lowRightB, pixelMaxB);
-            pixelMax(pixelMaxA, pixelMaxB, pixelMaxA);
-            pixelMax(pixelMaxA, returnVal.p, returnVal.p);
+            read_image_pixel_float( imageData, imageInfo, x1, y1, z1, upLeftA, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y1, z1, upRightA, lod );
+            check_for_denorms( upLeftA, containsDenorms );
+            check_for_denorms( upRightA, containsDenorms );
+            pixelMax( upLeftA, upRightA, pixelMaxA );
+            read_image_pixel_float( imageData, imageInfo, x1, y2, z1, lowLeftA, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y2, z1, lowRightA, lod );
+            check_for_denorms( lowLeftA, containsDenorms );
+            check_for_denorms( lowRightA, containsDenorms );
+            pixelMax( lowLeftA, lowRightA, pixelMaxB );
+            pixelMax( pixelMaxA, pixelMaxB, returnVal.p);
+            read_image_pixel_float( imageData, imageInfo, x1, y1, z2, upLeftB, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y1, z2, upRightB, lod );
+            check_for_denorms( upLeftB, containsDenorms );
+            check_for_denorms( upRightB, containsDenorms );
+            pixelMax( upLeftB, upRightB, pixelMaxA );
+            read_image_pixel_float( imageData, imageInfo, x1, y2, z2, lowLeftB, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y2, z2, lowRightB, lod );
+            check_for_denorms( lowLeftB, containsDenorms );
+            check_for_denorms( lowRightB, containsDenorms );
+            pixelMax( lowLeftB, lowRightB, pixelMaxB );
+            pixelMax( pixelMaxA, pixelMaxB, pixelMaxA);
+            pixelMax( pixelMaxA, returnVal.p, returnVal.p );
-            if (verbose)
+            if( verbose )
-                if (NULL == containsDenorms)
-                    log_info("\tSampled pixels (rgba order, denorms flushed to "
-                             "zero):\n");
+                if( NULL == containsDenorms )
+                    log_info( "\tSampled pixels (rgba order, denorms flushed to zero):\n" );
-                    log_info("\tSampled pixels (rgba order):\n");
-                log_info("\t\tp000: %f, %f, %f, %f\n", upLeftA[0], upLeftA[1],
-                         upLeftA[2], upLeftA[3]);
-                log_info("\t\tp001: %f, %f, %f, %f\n", upRightA[0], upRightA[1],
-                         upRightA[2], upRightA[3]);
-                log_info("\t\tp010: %f, %f, %f, %f\n", lowLeftA[0], lowLeftA[1],
-                         lowLeftA[2], lowLeftA[3]);
-                log_info("\t\tp011: %f, %f, %f, %f\n\n", lowRightA[0],
-                         lowRightA[1], lowRightA[2], lowRightA[3]);
-                log_info("\t\tp100: %f, %f, %f, %f\n", upLeftB[0], upLeftB[1],
-                         upLeftB[2], upLeftB[3]);
-                log_info("\t\tp101: %f, %f, %f, %f\n", upRightB[0], upRightB[1],
-                         upRightB[2], upRightB[3]);
-                log_info("\t\tp110: %f, %f, %f, %f\n", lowLeftB[0], lowLeftB[1],
-                         lowLeftB[2], lowLeftB[3]);
-                log_info("\t\tp111: %f, %f, %f, %f\n", lowRightB[0],
-                         lowRightB[1], lowRightB[2], lowRightB[3]);
+                    log_info( "\tSampled pixels (rgba order):\n" );
+                log_info( "\t\tp000: %f, %f, %f, %f\n", upLeftA[0], upLeftA[1], upLeftA[2], upLeftA[3] );
+                log_info( "\t\tp001: %f, %f, %f, %f\n", upRightA[0], upRightA[1], upRightA[2], upRightA[3] );
+                log_info( "\t\tp010: %f, %f, %f, %f\n", lowLeftA[0], lowLeftA[1], lowLeftA[2], lowLeftA[3] );
+                log_info( "\t\tp011: %f, %f, %f, %f\n\n", lowRightA[0], lowRightA[1], lowRightA[2], lowRightA[3] );
+                log_info( "\t\tp100: %f, %f, %f, %f\n", upLeftB[0], upLeftB[1], upLeftB[2], upLeftB[3] );
+                log_info( "\t\tp101: %f, %f, %f, %f\n", upRightB[0], upRightB[1], upRightB[2], upRightB[3] );
+                log_info( "\t\tp110: %f, %f, %f, %f\n", lowLeftB[0], lowLeftB[1], lowLeftB[2], lowLeftB[3] );
+                log_info( "\t\tp111: %f, %f, %f, %f\n", lowRightB[0], lowRightB[1], lowRightB[2], lowRightB[3] );
-            double weights[2][2][2];
+            double weights[ 2 ][ 2 ][ 2 ];
-            float a = frac(x - 0.5f), b = frac(y - 0.5f), c = frac(z - 0.5f);
-            weights[0][0][0] = weights[0][1][0] = weights[0][0][1] =
-                weights[0][1][1] = 1.f - a;
-            weights[1][0][0] = weights[1][1][0] = weights[1][0][1] =
-                weights[1][1][1] = a;
-            weights[0][0][0] *= 1.f - b;
-            weights[1][0][0] *= 1.f - b;
-            weights[0][0][1] *= 1.f - b;
-            weights[1][0][1] *= 1.f - b;
-            weights[0][1][0] *= b;
-            weights[1][1][0] *= b;
-            weights[0][1][1] *= b;
-            weights[1][1][1] *= b;
-            weights[0][0][0] *= 1.f - c;
-            weights[0][1][0] *= 1.f - c;
-            weights[1][0][0] *= 1.f - c;
-            weights[1][1][0] *= 1.f - c;
-            weights[0][0][1] *= c;
-            weights[0][1][1] *= c;
-            weights[1][0][1] *= c;
-            weights[1][1][1] *= c;
+            float a = frac( x - 0.5f ), b = frac( y - 0.5f ), c = frac( z - 0.5f );
+            weights[ 0 ][ 0 ][ 0 ] = weights[ 0 ][ 1 ][ 0 ] = weights[ 0 ][ 0 ][ 1 ] = weights[ 0 ][ 1 ][ 1 ] = 1.f - a;
+            weights[ 1 ][ 0 ][ 0 ] = weights[ 1 ][ 1 ][ 0 ] = weights[ 1 ][ 0 ][ 1 ] = weights[ 1 ][ 1 ][ 1 ] = a;
+            weights[ 0 ][ 0 ][ 0 ] *= 1.f - b;
+            weights[ 1 ][ 0 ][ 0 ] *= 1.f - b;
+            weights[ 0 ][ 0 ][ 1 ] *= 1.f - b;
+            weights[ 1 ][ 0 ][ 1 ] *= 1.f - b;
+            weights[ 0 ][ 1 ][ 0 ] *= b;
+            weights[ 1 ][ 1 ][ 0 ] *= b;
+            weights[ 0 ][ 1 ][ 1 ] *= b;
+            weights[ 1 ][ 1 ][ 1 ] *= b;
+            weights[ 0 ][ 0 ][ 0 ] *= 1.f - c;
+            weights[ 0 ][ 1 ][ 0 ] *= 1.f - c;
+            weights[ 1 ][ 0 ][ 0 ] *= 1.f - c;
+            weights[ 1 ][ 1 ][ 0 ] *= 1.f - c;
+            weights[ 0 ][ 0 ][ 1 ] *= c;
+            weights[ 0 ][ 1 ][ 1 ] *= c;
+            weights[ 1 ][ 0 ][ 1 ] *= c;
+            weights[ 1 ][ 1 ][ 1 ] *= c;
-            if (verbose)
-                log_info("\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f, "
-                         "frac( z - 0.5f ) = %f\n",
-                         frac(x - 0.5f), frac(y - 0.5f), frac(z - 0.5f));
+            if( verbose )
+                log_info( "\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f, frac( z - 0.5f ) = %f\n",
+                         frac( x - 0.5f ), frac( y - 0.5f ), frac( z - 0.5f )  );
-            for (int i = 0; i < 3; i++)
+            for( int i = 0; i < 3; i++ )
-                outData[i] = (float)((upLeftA[i] * weights[0][0][0])
-                                     + (upRightA[i] * weights[1][0][0])
-                                     + (lowLeftA[i] * weights[0][1][0])
-                                     + (lowRightA[i] * weights[1][1][0])
-                                     + (upLeftB[i] * weights[0][0][1])
-                                     + (upRightB[i] * weights[1][0][1])
-                                     + (lowLeftB[i] * weights[0][1][1])
-                                     + (lowRightB[i] * weights[1][1][1]));
+                outData[ i ] = (float)( ( upLeftA[ i ] * weights[ 0 ][ 0 ][ 0 ] ) +
+                                        ( upRightA[ i ] * weights[ 1 ][ 0 ][ 0 ] ) +
+                                        ( lowLeftA[ i ] * weights[ 0 ][ 1 ][ 0 ] ) +
+                                        ( lowRightA[ i ] * weights[ 1 ][ 1 ][ 0 ] ) +
+                                        ( upLeftB[ i ] * weights[ 0 ][ 0 ][ 1 ] ) +
+                                        ( upRightB[ i ] * weights[ 1 ][ 0 ][ 1 ] ) +
+                                        ( lowLeftB[ i ] * weights[ 0 ][ 1 ][ 1 ] ) +
+                                        ( lowRightB[ i ] * weights[ 1 ][ 1 ][ 1 ] ));
                 // flush subnormal results to zero if necessary
-                if (NULL == containsDenorms && fabs(outData[i]) < FLT_MIN)
-                    outData[i] = copysignf(0.0f, outData[i]);
+                if( NULL == containsDenorms && fabs(outData[i]) < FLT_MIN )
+                    outData[i] = copysignf( 0.0f, outData[i] );
-            outData[3] = (float)((upLeftA[3] * weights[0][0][0])
-                                 + (upRightA[3] * weights[1][0][0])
-                                 + (lowLeftA[3] * weights[0][1][0])
-                                 + (lowRightA[3] * weights[1][1][0])
-                                 + (upLeftB[3] * weights[0][0][1])
-                                 + (upRightB[3] * weights[1][0][1])
-                                 + (lowLeftB[3] * weights[0][1][1])
-                                 + (lowRightB[3] * weights[1][1][1]));
+            outData[ 3 ] = (float)( ( upLeftA[ 3 ] * weights[ 0 ][ 0 ][ 0 ] ) +
+                                   ( upRightA[ 3 ] * weights[ 1 ][ 0 ][ 0 ] ) +
+                                   ( lowLeftA[ 3 ] * weights[ 0 ][ 1 ][ 0 ] ) +
+                                   ( lowRightA[ 3 ] * weights[ 1 ][ 1 ][ 0 ] ) +
+                                   ( upLeftB[ 3 ] * weights[ 0 ][ 0 ][ 1 ] ) +
+                                   ( upRightB[ 3 ] * weights[ 1 ][ 0 ][ 1 ] ) +
+                                   ( lowLeftB[ 3 ] * weights[ 0 ][ 1 ][ 1 ] ) +
+                                   ( lowRightB[ 3 ] * weights[ 1 ][ 1 ][ 1 ] ));
             // flush subnormal results to zero if necessary
-            if (NULL == containsDenorms && fabs(outData[3]) < FLT_MIN)
-                outData[3] = copysignf(0.0f, outData[3]);
+            if( NULL == containsDenorms && fabs(outData[3]) < FLT_MIN )
+                outData[3] = copysignf( 0.0f, outData[3] );
         return returnVal;
-FloatPixel sample_image_pixel_float_offset(
-    void *imageData, image_descriptor *imageInfo, float x, float y, float z,
-    float xAddressOffset, float yAddressOffset, float zAddressOffset,
-    image_sampler_data *imageSampler, float *outData, int verbose,
-    int *containsDenorms)
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms )
-    return sample_image_pixel_float_offset(
-        imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset,
-        zAddressOffset, imageSampler, outData, verbose, containsDenorms, 0);
+  return sample_image_pixel_float_offset( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
+    imageSampler, outData, verbose, containsDenorms, 0);
-int debug_find_vector_in_image(void *imagePtr, image_descriptor *imageInfo,
-                               void *vectorToFind, size_t vectorSize, int *outX,
-                               int *outY, int *outZ, size_t lod)
+int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
+                               void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod )
     int foundCount = 0;
     char *iPtr = (char *)imagePtr;
@@ -2305,159 +2315,151 @@
     switch (imageInfo->type)
-        case CL_MEM_OBJECT_IMAGE1D:
-            width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height = 1;
-            depth = 1;
-            break;
-            width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height = 1;
-            depth = imageInfo->arraySize;
-            break;
-        case CL_MEM_OBJECT_IMAGE2D:
-            width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height =
-                (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            depth = 1;
-            break;
-            width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height =
-                (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            depth = imageInfo->arraySize;
-            break;
-        case CL_MEM_OBJECT_IMAGE3D:
-            width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height =
-                (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-            break;
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = 1;
+      depth = 1;
+      break;
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = 1;
+      depth = imageInfo->arraySize;
+      break;
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+      depth = 1;
+      break;
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+      depth = imageInfo->arraySize;
+      break;
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+      depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
+      break;
-    row_pitch = width * get_pixel_size(imageInfo->format);
+    row_pitch = width * get_pixel_size( imageInfo->format );
     slice_pitch = row_pitch * height;
-    for (size_t z = 0; z < depth; z++)
+    for( size_t z = 0; z < depth; z++ )
-        for (size_t y = 0; y < height; y++)
+        for( size_t y = 0; y < height; y++ )
-            for (size_t x = 0; x < width; x++)
+            for( size_t x = 0; x < width; x++)
-                if (memcmp(iPtr, vectorToFind, vectorSize) == 0)
+                if( memcmp( iPtr, vectorToFind, vectorSize ) == 0 )
-                    if (foundCount == 0)
+                    if( foundCount == 0 )
                         *outX = (int)x;
-                        if (outY != NULL) *outY = (int)y;
-                        if (outZ != NULL) *outZ = (int)z;
+                        if (outY != NULL)
+                            *outY = (int)y;
+                        if( outZ != NULL )
+                            *outZ = (int)z;
                 iPtr += vectorSize;
-            iPtr += row_pitch - (width * vectorSize);
+            iPtr += row_pitch - ( width * vectorSize );
-        iPtr += slice_pitch - (height * row_pitch);
+        iPtr += slice_pitch - ( height * row_pitch );
     return foundCount;
-int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
-                              unsigned int *valuesToFind, int *outX, int *outY,
-                              int *outZ, int lod)
+int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                              unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod )
-    char vectorToFind[4 * 4];
-    size_t vectorSize = get_format_channel_count(imageInfo->format);
+    char vectorToFind[ 4 * 4 ];
+    size_t vectorSize = get_format_channel_count( imageInfo->format );
-    if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT8)
+    if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT8 )
         unsigned char *p = (unsigned char *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (unsigned char)valuesToFind[i];
-    else if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT16)
+    else if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT16 )
         unsigned short *p = (unsigned short *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (unsigned short)valuesToFind[i];
         vectorSize *= 2;
-    else if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT32)
+    else if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT32 )
         unsigned int *p = (unsigned int *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (unsigned int)valuesToFind[i];
         vectorSize *= 4;
-        log_info("WARNING: Unable to search for debug pixel: invalid image "
-                 "format\n");
+        log_info( "WARNING: Unable to search for debug pixel: invalid image format\n" );
         return false;
-    return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
-                                      vectorSize, outX, outY, outZ, lod);
+    return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
-int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
-                              int *valuesToFind, int *outX, int *outY,
-                              int *outZ, int lod)
+int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                              int *valuesToFind, int *outX, int *outY, int *outZ, int lod )
-    char vectorToFind[4 * 4];
-    size_t vectorSize = get_format_channel_count(imageInfo->format);
+    char vectorToFind[ 4 * 4 ];
+    size_t vectorSize = get_format_channel_count( imageInfo->format );
-    if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT8)
+    if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT8 )
         char *p = (char *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (char)valuesToFind[i];
-    else if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT16)
+    else if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT16 )
         short *p = (short *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (short)valuesToFind[i];
         vectorSize *= 2;
-    else if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT32)
+    else if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT32 )
         int *p = (int *)vectorToFind;
-        for (unsigned int i = 0; i < vectorSize; i++)
+        for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (int)valuesToFind[i];
         vectorSize *= 4;
-        log_info("WARNING: Unable to search for debug pixel: invalid image "
-                 "format\n");
+        log_info( "WARNING: Unable to search for debug pixel: invalid image format\n" );
         return false;
-    return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
-                                      vectorSize, outX, outY, outZ, lod);
+    return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
-int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
-                              float *valuesToFind, int *outX, int *outY,
-                              int *outZ, int lod)
+int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                              float *valuesToFind, int *outX, int *outY, int *outZ, int lod )
-    char vectorToFind[4 * 4];
+    char vectorToFind[ 4 * 4 ];
     float swizzled[4];
-    memcpy(swizzled, valuesToFind, sizeof(swizzled));
-    size_t vectorSize = get_pixel_size(imageInfo->format);
-    pack_image_pixel(swizzled, imageInfo->format, vectorToFind);
-    return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
-                                      vectorSize, outX, outY, outZ, lod);
+    memcpy( swizzled, valuesToFind, sizeof( swizzled ) );
+    size_t vectorSize = get_pixel_size( imageInfo->format );
+    pack_image_pixel( swizzled, imageInfo->format, vectorToFind );
+    return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
-template <class T>
-void swizzle_vector_for_image(T *srcVector, const cl_image_format *imageFormat)
+template <class T> void swizzle_vector_for_image( T *srcVector, const cl_image_format *imageFormat )
     T temp;
-    switch (imageFormat->image_channel_order)
+    switch( imageFormat->image_channel_order )
-        case CL_A: srcVector[0] = srcVector[3]; break;
+        case CL_A:
+            srcVector[ 0 ] = srcVector[ 3 ];
+            break;
         case CL_R:
         case CL_Rx:
         case CL_RG:
@@ -2467,475 +2469,461 @@
         case CL_RGBA:
         case CL_sRGB:
         case CL_sRGBx:
-        case CL_sRGBA: break;
-        case CL_RA: srcVector[1] = srcVector[3]; break;
-        case CL_ARGB:
-            temp = srcVector[3];
-            srcVector[3] = srcVector[2];
-            srcVector[2] = srcVector[1];
-            srcVector[1] = srcVector[0];
-            srcVector[0] = temp;
+        case CL_sRGBA:
-        case CL_ABGR:
-            temp = srcVector[3];
-            srcVector[3] = srcVector[0];
-            srcVector[0] = temp;
-            temp = srcVector[2];
-            srcVector[2] = srcVector[1];
-            srcVector[1] = temp;
+        case CL_RA:
+            srcVector[ 1 ] = srcVector[ 3 ];
+            break;
+        case CL_ARGB:
+            temp = srcVector[ 3 ];
+            srcVector[ 3 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = srcVector[ 1 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
+            srcVector[ 0 ] = temp;
         case CL_BGRA:
         case CL_sBGRA:
-            temp = srcVector[0];
-            srcVector[0] = srcVector[2];
-            srcVector[2] = temp;
+            temp = srcVector[ 0 ];
+            srcVector[ 0 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = temp;
         case CL_INTENSITY:
-            srcVector[3] = srcVector[0];
-            srcVector[2] = srcVector[0];
-            srcVector[1] = srcVector[0];
+            srcVector[ 3 ] = srcVector[ 0 ];
+            srcVector[ 2 ] = srcVector[ 0 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
         case CL_LUMINANCE:
-            srcVector[2] = srcVector[0];
-            srcVector[1] = srcVector[0];
+            srcVector[ 2 ] = srcVector[ 0 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
 #ifdef CL_1RGB_APPLE
         case CL_1RGB_APPLE:
-            temp = srcVector[3];
-            srcVector[3] = srcVector[2];
-            srcVector[2] = srcVector[1];
-            srcVector[1] = srcVector[0];
-            srcVector[0] = temp;
+            temp = srcVector[ 3 ];
+            srcVector[ 3 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = srcVector[ 1 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
+            srcVector[ 0 ] = temp;
 #ifdef CL_BGR1_APPLE
         case CL_BGR1_APPLE:
-            temp = srcVector[0];
-            srcVector[0] = srcVector[2];
-            srcVector[2] = temp;
+            temp = srcVector[ 0 ];
+            srcVector[ 0 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = temp;
-#define SATURATE(v, min, max) (v < min ? min : (v > max ? max : v))
+#define SATURATE( v, min, max ) ( v < min ? min : ( v > max ? max : v ) )
-void pack_image_pixel(unsigned int *srcVector,
-                      const cl_image_format *imageFormat, void *outData)
+void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData )
-    swizzle_vector_for_image<unsigned int>(srcVector, imageFormat);
-    size_t channelCount = get_format_channel_count(imageFormat);
+    swizzle_vector_for_image<unsigned int>( srcVector, imageFormat );
+    size_t channelCount = get_format_channel_count( imageFormat );
-    switch (imageFormat->image_channel_data_type)
+    switch( imageFormat->image_channel_data_type )
-        case CL_UNSIGNED_INT8: {
+        case CL_UNSIGNED_INT8:
+        {
             unsigned char *ptr = (unsigned char *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (unsigned char)SATURATE(srcVector[i], 0, 255);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned char)SATURATE( srcVector[ i ], 0, 255 );
-        case CL_UNSIGNED_INT16: {
+        case CL_UNSIGNED_INT16:
+        {
             unsigned short *ptr = (unsigned short *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (unsigned short)SATURATE(srcVector[i], 0, 65535);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned short)SATURATE( srcVector[ i ], 0, 65535 );
-        case CL_UNSIGNED_INT32: {
+        case CL_UNSIGNED_INT32:
+        {
             unsigned int *ptr = (unsigned int *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (unsigned int)srcVector[i];
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned int)srcVector[ i ];
-        default: break;
+        default:
+            break;
-void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat,
-                      void *outData)
+void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData )
-    swizzle_vector_for_image<int>(srcVector, imageFormat);
-    size_t chanelCount = get_format_channel_count(imageFormat);
+    swizzle_vector_for_image<int>( srcVector, imageFormat );
+    size_t chanelCount = get_format_channel_count( imageFormat );
-    switch (imageFormat->image_channel_data_type)
+    switch( imageFormat->image_channel_data_type )
-        case CL_SIGNED_INT8: {
+        case CL_SIGNED_INT8:
+        {
             char *ptr = (char *)outData;
-            for (unsigned int i = 0; i < chanelCount; i++)
-                ptr[i] = (char)SATURATE(srcVector[i], -128, 127);
+            for( unsigned int i = 0; i < chanelCount; i++ )
+                ptr[ i ] = (char)SATURATE( srcVector[ i ], -128, 127 );
-        case CL_SIGNED_INT16: {
+        case CL_SIGNED_INT16:
+        {
             short *ptr = (short *)outData;
-            for (unsigned int i = 0; i < chanelCount; i++)
-                ptr[i] = (short)SATURATE(srcVector[i], -32768, 32767);
+            for( unsigned int i = 0; i < chanelCount; i++ )
+                ptr[ i ] = (short)SATURATE( srcVector[ i ], -32768, 32767 );
-        case CL_SIGNED_INT32: {
+        case CL_SIGNED_INT32:
+        {
             int *ptr = (int *)outData;
-            for (unsigned int i = 0; i < chanelCount; i++)
-                ptr[i] = (int)srcVector[i];
+            for( unsigned int i = 0; i < chanelCount; i++ )
+                ptr[ i ] = (int)srcVector[ i ];
-        default: break;
+        default:
+            break;
-int round_to_even(float v)
+int round_to_even( float v )
     // clamp overflow
-    if (v >= -(float)INT_MIN) return INT_MAX;
-    if (v <= (float)INT_MIN) return INT_MIN;
+    if( v >= - (float) INT_MIN )
+        return INT_MAX;
+    if( v <= (float) INT_MIN )
+        return INT_MIN;
     // round fractional values to integer value
-    if (fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23))
+    if( fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) )
-        static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23),
-                                        MAKE_HEX_FLOAT(-0x1.0p23f, -0x1L, 23) };
-        float magicVal = magic[v < 0.0f];
+        static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23), MAKE_HEX_FLOAT(-0x1.0p23f, -0x1L, 23) };
+        float magicVal = magic[ v < 0.0f ];
         v += magicVal;
         v -= magicVal;
-    return (int)v;
+    return (int) v;
-void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat,
-                      void *outData)
+void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData )
-    swizzle_vector_for_image<float>(srcVector, imageFormat);
-    size_t channelCount = get_format_channel_count(imageFormat);
-    switch (imageFormat->image_channel_data_type)
+    swizzle_vector_for_image<float>( srcVector, imageFormat );
+    size_t channelCount = get_format_channel_count( imageFormat );
+    switch( imageFormat->image_channel_data_type )
-        case CL_HALF_FLOAT: {
-            cl_half *ptr = (cl_half *)outData;
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *ptr = (cl_ushort *)outData;
-            switch (gFloatToHalfRoundingMode)
+            switch( gFloatToHalfRoundingMode )
                 case kRoundToNearestEven:
-                    for (unsigned int i = 0; i < channelCount; i++)
-                        ptr[i] = cl_half_from_float(srcVector[i], CL_HALF_RTE);
-                    break;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                        ptr[ i ] = float2half_rte( srcVector[ i ] );
+            break;
                 case kRoundTowardZero:
-                    for (unsigned int i = 0; i < channelCount; i++)
-                        ptr[i] = cl_half_from_float(srcVector[i], CL_HALF_RTZ);
+                    for( unsigned int i = 0; i < channelCount; i++ )
+                        ptr[ i ] = float2half_rtz( srcVector[ i ] );
-                    log_error("ERROR: Test internal error -- unhandled or "
-                              "unknown float->half rounding mode.\n");
+                    log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
-            }
+        }
-        case CL_FLOAT: {
+        case CL_FLOAT:
+        {
             cl_float *ptr = (cl_float *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = srcVector[i];
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = srcVector[ i ];
-        case CL_SNORM_INT8: {
+        case CL_SNORM_INT8:
+        {
             cl_char *ptr = (cl_char *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] =
-                    (cl_char)NORMALIZE_SIGNED(srcVector[i], -127.0f, 127.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_char)NORMALIZE_SIGNED( srcVector[ i ], -127.0f, 127.f );
-        case CL_SNORM_INT16: {
+        case CL_SNORM_INT16:
+        {
             cl_short *ptr = (cl_short *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] =
-                    (short)NORMALIZE_SIGNED(srcVector[i], -32767.f, 32767.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (short)NORMALIZE_SIGNED( srcVector[ i ], -32767.f, 32767.f  );
-        case CL_UNORM_INT8: {
+        case CL_UNORM_INT8:
+        {
             cl_uchar *ptr = (cl_uchar *)outData;
-            if (is_sRGBA_order(imageFormat->image_channel_order))
+            if ( is_sRGBA_order(imageFormat->image_channel_order) )
-                ptr[0] = (unsigned char)(sRGBmap(srcVector[0]) + 0.5);
-                ptr[1] = (unsigned char)(sRGBmap(srcVector[1]) + 0.5);
-                ptr[2] = (unsigned char)(sRGBmap(srcVector[2]) + 0.5);
+                ptr[ 0 ] = (unsigned char)( sRGBmap( srcVector[ 0 ] ) + 0.5 );
+                ptr[ 1 ] = (unsigned char)( sRGBmap( srcVector[ 1 ] ) + 0.5 );
+                ptr[ 2 ] = (unsigned char)( sRGBmap( srcVector[ 2 ] ) + 0.5 );
                 if (channelCount == 4)
-                    ptr[3] = (unsigned char)NORMALIZE(srcVector[3], 255.f);
+                    ptr[ 3 ] = (unsigned char)NORMALIZE( srcVector[ 3 ], 255.f );
-                for (unsigned int i = 0; i < channelCount; i++)
-                    ptr[i] = (unsigned char)NORMALIZE(srcVector[i], 255.f);
+                for( unsigned int i = 0; i < channelCount; i++ )
+                    ptr[ i ] = (unsigned char)NORMALIZE( srcVector[ i ], 255.f );
 #ifdef CL_1RGB_APPLE
-            if (imageFormat->image_channel_order == CL_1RGB_APPLE)
+            if( imageFormat->image_channel_order == CL_1RGB_APPLE )
                 ptr[0] = 255.0f;
 #ifdef CL_BGR1_APPLE
-            if (imageFormat->image_channel_order == CL_BGR1_APPLE)
+            if( imageFormat->image_channel_order == CL_BGR1_APPLE )
                 ptr[3] = 255.0f;
-        case CL_UNORM_INT16: {
+        case CL_UNORM_INT16:
+        {
             cl_ushort *ptr = (cl_ushort *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (unsigned short)NORMALIZE(srcVector[i], 65535.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned short)NORMALIZE( srcVector[ i ], 65535.f );
-        case CL_UNORM_SHORT_555: {
+        case CL_UNORM_SHORT_555:
+        {
             cl_ushort *ptr = (cl_ushort *)outData;
-            ptr[0] =
-                (((unsigned short)NORMALIZE(srcVector[0], 31.f) & 31) << 10)
-                | (((unsigned short)NORMALIZE(srcVector[1], 31.f) & 31) << 5)
-                | (((unsigned short)NORMALIZE(srcVector[2], 31.f) & 31) << 0);
+            ptr[ 0 ] = ( ( (unsigned short)NORMALIZE( srcVector[ 0 ], 31.f ) & 31 ) << 10 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 1 ], 31.f ) & 31 ) << 5 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 2 ], 31.f ) & 31 ) << 0 );
-        case CL_UNORM_SHORT_565: {
+        case CL_UNORM_SHORT_565:
+        {
             cl_ushort *ptr = (cl_ushort *)outData;
-            ptr[0] =
-                (((unsigned short)NORMALIZE(srcVector[0], 31.f) & 31) << 11)
-                | (((unsigned short)NORMALIZE(srcVector[1], 63.f) & 63) << 5)
-                | (((unsigned short)NORMALIZE(srcVector[2], 31.f) & 31) << 0);
+            ptr[ 0 ] = ( ( (unsigned short)NORMALIZE( srcVector[ 0 ], 31.f ) & 31 ) << 11 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 1 ], 63.f ) & 63 ) << 5 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 2 ], 31.f ) & 31 ) << 0 );
-        case CL_UNORM_INT_101010: {
+        case CL_UNORM_INT_101010:
+        {
             cl_uint *ptr = (cl_uint *)outData;
-            ptr[0] =
-                (((unsigned int)NORMALIZE(srcVector[0], 1023.f) & 1023) << 20)
-                | (((unsigned int)NORMALIZE(srcVector[1], 1023.f) & 1023) << 10)
-                | (((unsigned int)NORMALIZE(srcVector[2], 1023.f) & 1023) << 0);
+            ptr[ 0 ] = ( ( (unsigned int)NORMALIZE( srcVector[ 0 ], 1023.f ) & 1023 ) << 20 ) |
+            ( ( (unsigned int)NORMALIZE( srcVector[ 1 ], 1023.f ) & 1023 ) << 10 ) |
+            ( ( (unsigned int)NORMALIZE( srcVector[ 2 ], 1023.f ) & 1023 ) << 0 );
-        case CL_SIGNED_INT8: {
+        case CL_SIGNED_INT8:
+        {
             cl_char *ptr = (cl_char *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] =
-                    (cl_char)CONVERT_INT(srcVector[i], -127.0f, 127.f, 127);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_char)CONVERT_INT( srcVector[ i ], -127.0f, 127.f, 127 );
-        case CL_SIGNED_INT16: {
+        case CL_SIGNED_INT16:
+        {
             cl_short *ptr = (cl_short *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] =
-                    (short)CONVERT_INT(srcVector[i], -32767.f, 32767.f, 32767);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (short)CONVERT_INT( srcVector[ i ], -32767.f, 32767.f, 32767  );
-        case CL_SIGNED_INT32: {
+        case CL_SIGNED_INT32:
+        {
             cl_int *ptr = (cl_int *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (int)CONVERT_INT(
-                    srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31),
-                    MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23),
-                    CL_INT_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (int)CONVERT_INT( srcVector[ i ], MAKE_HEX_FLOAT( -0x1.0p31f, -1, 31), MAKE_HEX_FLOAT( 0x1.fffffep30f, 0x1fffffe, 30-23), CL_INT_MAX  );
-        case CL_UNSIGNED_INT8: {
+        case CL_UNSIGNED_INT8:
+        {
             cl_uchar *ptr = (cl_uchar *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] =
-                    (cl_uchar)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_uchar)CONVERT_UINT( srcVector[ i ], 255.f, CL_UCHAR_MAX );
-        case CL_UNSIGNED_INT16: {
+        case CL_UNSIGNED_INT16:
+        {
             cl_ushort *ptr = (cl_ushort *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (cl_ushort)CONVERT_UINT(srcVector[i], 32767.f,
-                                                 CL_USHRT_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_ushort)CONVERT_UINT( srcVector[ i ], 32767.f, CL_USHRT_MAX );
-        case CL_UNSIGNED_INT32: {
+        case CL_UNSIGNED_INT32:
+        {
             cl_uint *ptr = (cl_uint *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (cl_uint)CONVERT_UINT(
-                    srcVector[i],
-                    MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffe, 31 - 23),
-                    CL_UINT_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_uint)CONVERT_UINT( srcVector[ i ], MAKE_HEX_FLOAT( 0x1.fffffep31f, 0x1fffffe, 31-23), CL_UINT_MAX  );
-        case CL_SFIXED14_APPLE: {
-            cl_ushort *ptr = (cl_ushort *)outData;
-            for (unsigned int i = 0; i < channelCount; i++)
+        case CL_SFIXED14_APPLE:
+        {
+            cl_ushort *ptr = (cl_ushort*)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
-                cl_float f = fmaxf(srcVector[i], -1.0f);
-                f = fminf(f, 3.0f);
+                cl_float f = fmaxf( srcVector[i], -1.0f );
+                f = fminf( f, 3.0f );
                 cl_int d = rintf(f * 0x1.0p14f);
                 d += 16384;
-                if (d > CL_USHRT_MAX) d = CL_USHRT_MAX;
+                if( d > CL_USHRT_MAX )
+                    d = CL_USHRT_MAX;
                 ptr[i] = d;
-            log_error("INTERNAL ERROR: unknown format (%d)\n",
-                      imageFormat->image_channel_data_type);
+            log_error( "INTERNAL ERROR: unknown format (%d)\n", imageFormat->image_channel_data_type);
-void pack_image_pixel_error(const float *srcVector,
-                            const cl_image_format *imageFormat,
-                            const void *results, float *errors)
+void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results, float *errors )
-    size_t channelCount = get_format_channel_count(imageFormat);
-    switch (imageFormat->image_channel_data_type)
+    size_t channelCount = get_format_channel_count( imageFormat );
+    switch( imageFormat->image_channel_data_type )
-        case CL_HALF_FLOAT: {
-            const cl_half *ptr = (const cl_half *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = Ulp_Error_Half(ptr[i], srcVector[i]);
-            break;
-        }
-        case CL_FLOAT: {
+        case CL_HALF_FLOAT:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = Ulp_Error(ptr[i], srcVector[i]);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = Ulp_Error_Half( ptr[i], srcVector[i] );
-        case CL_SNORM_INT8: {
+        case CL_FLOAT:
+        {
+            const cl_ushort *ptr = (const cl_ushort *)results;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = Ulp_Error( ptr[i], srcVector[i] );
+            break;
+        }
+        case CL_SNORM_INT8:
+        {
             const cl_char *ptr = (const cl_char *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i]
-                    - NORMALIZE_SIGNED_UNROUNDED(srcVector[i], -127.0f, 127.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( srcVector[ i ], -127.0f, 127.f );
-        case CL_SNORM_INT16: {
+        case CL_SNORM_INT16:
+        {
             const cl_short *ptr = (const cl_short *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i]
-                    - NORMALIZE_SIGNED_UNROUNDED(srcVector[i], -32767.f,
-                                                 32767.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( srcVector[ i ], -32767.f, 32767.f  );
-        case CL_UNORM_INT8: {
+        case CL_UNORM_INT8:
+        {
             const cl_uchar *ptr = (const cl_uchar *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i] - NORMALIZE_UNROUNDED(srcVector[i], 255.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_UNROUNDED( srcVector[ i ], 255.f  );
-        case CL_UNORM_INT16: {
+        case CL_UNORM_INT16:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i] - NORMALIZE_UNROUNDED(srcVector[i], 65535.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_UNROUNDED( srcVector[ i ], 65535.f  );
-        case CL_UNORM_SHORT_555: {
+        case CL_UNORM_SHORT_555:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
-            errors[0] =
-                ((ptr[0] >> 10) & 31) - NORMALIZE_UNROUNDED(srcVector[0], 31.f);
-            errors[1] =
-                ((ptr[0] >> 5) & 31) - NORMALIZE_UNROUNDED(srcVector[1], 31.f);
-            errors[2] =
-                ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED(srcVector[2], 31.f);
+            errors[0] = ((ptr[0] >> 10) & 31) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 31.f );
+            errors[1] = ((ptr[0] >>  5) & 31) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 31.f );
+            errors[2] = ((ptr[0] >>  0) & 31) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 31.f );
-        case CL_UNORM_SHORT_565: {
+        case CL_UNORM_SHORT_565:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
-            errors[0] =
-                ((ptr[0] >> 11) & 31) - NORMALIZE_UNROUNDED(srcVector[0], 31.f);
-            errors[1] =
-                ((ptr[0] >> 5) & 63) - NORMALIZE_UNROUNDED(srcVector[1], 63.f);
-            errors[2] =
-                ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED(srcVector[2], 31.f);
+            errors[0] = ((ptr[0] >> 11) & 31) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 31.f );
+            errors[1] = ((ptr[0] >>  5) & 63) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 63.f );
+            errors[2] = ((ptr[0] >>  0) & 31) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 31.f );
-        case CL_UNORM_INT_101010: {
+        case CL_UNORM_INT_101010:
+        {
             const cl_uint *ptr = (const cl_uint *)results;
-            errors[0] = ((ptr[0] >> 20) & 1023)
-                - NORMALIZE_UNROUNDED(srcVector[0], 1023.f);
-            errors[1] = ((ptr[0] >> 10) & 1023)
-                - NORMALIZE_UNROUNDED(srcVector[1], 1023.f);
-            errors[2] = ((ptr[0] >> 0) & 1023)
-                - NORMALIZE_UNROUNDED(srcVector[2], 1023.f);
+            errors[0] = ((ptr[0] >> 20) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 1023.f );
+            errors[1] = ((ptr[0] >> 10) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 1023.f );
+            errors[2] = ((ptr[0] >>  0) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 1023.f );
-        case CL_SIGNED_INT8: {
+        case CL_SIGNED_INT8:
+        {
             const cl_char *ptr = (const cl_char *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] =
-                    ptr[i] - CONVERT_INT(srcVector[i], -127.0f, 127.f, 127);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[ i ] = ptr[i] - CONVERT_INT( srcVector[ i ], -127.0f, 127.f, 127 );
-        case CL_SIGNED_INT16: {
+        case CL_SIGNED_INT16:
+        {
             const cl_short *ptr = (const cl_short *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i]
-                    - CONVERT_INT(srcVector[i], -32767.f, 32767.f, 32767);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[ i ] - CONVERT_INT( srcVector[ i ], -32767.f, 32767.f, 32767  );
-        case CL_SIGNED_INT32: {
+        case CL_SIGNED_INT32:
+        {
             const cl_int *ptr = (const cl_int *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_float)(
-                    (cl_long)ptr[i]
-                    - (cl_long)CONVERT_INT(
-                        srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31),
-                        MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23),
-                        CL_INT_MAX));
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_float)((cl_long) ptr[ i ] - (cl_long) CONVERT_INT( srcVector[ i ], MAKE_HEX_FLOAT( -0x1.0p31f, -1, 31), MAKE_HEX_FLOAT( 0x1.fffffep30f, 0x1fffffe, 30-23), CL_INT_MAX  ));
-        case CL_UNSIGNED_INT8: {
+        case CL_UNSIGNED_INT8:
+        {
             const cl_uchar *ptr = (const cl_uchar *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_int)ptr[i]
-                    - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_int) ptr[ i ] - (cl_int) CONVERT_UINT( srcVector[ i ], 255.f, CL_UCHAR_MAX );
-        case CL_UNSIGNED_INT16: {
+        case CL_UNSIGNED_INT16:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_int)ptr[i]
-                    - (cl_int)CONVERT_UINT(srcVector[i], 32767.f, CL_USHRT_MAX);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_int) ptr[ i ] - (cl_int) CONVERT_UINT( srcVector[ i ], 32767.f, CL_USHRT_MAX );
-        case CL_UNSIGNED_INT32: {
+        case CL_UNSIGNED_INT32:
+        {
             const cl_uint *ptr = (const cl_uint *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_float)(
-                    (cl_long)ptr[i]
-                    - (cl_long)CONVERT_UINT(
-                        srcVector[i],
-                        MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffe, 31 - 23),
-                        CL_UINT_MAX));
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_float)((cl_long) ptr[ i ] - (cl_long)CONVERT_UINT( srcVector[ i ], MAKE_HEX_FLOAT( 0x1.fffffep31f, 0x1fffffe, 31-23), CL_UINT_MAX  ));
-        case CL_SFIXED14_APPLE: {
+        case CL_SFIXED14_APPLE:
+        {
             const cl_ushort *ptr = (const cl_ushort *)results;
-            for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = ptr[i]
-                    - NORMALIZE_SIGNED_UNROUNDED(((int)srcVector[i] - 16384),
-                                                 -16384.f, 49151.f);
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( ((int) srcVector[ i ] - 16384), -16384.f, 49151.f  );
-            log_error("INTERNAL ERROR: unknown format (%d)\n",
-                      imageFormat->image_channel_data_type);
+            log_error( "INTERNAL ERROR: unknown format (%d)\n", imageFormat->image_channel_data_type);
@@ -2944,238 +2932,208 @@
 //  Autodetect which rounding mode is used for image writes to CL_HALF_FLOAT
-//  This should be called lazily before attempting to verify image writes,
-//  otherwise an error will occur.
+//  This should be called lazily before attempting to verify image writes, otherwise an error will occur.
-int DetectFloatToHalfRoundingMode(
-    cl_command_queue q) // Returns CL_SUCCESS on success
+int  DetectFloatToHalfRoundingMode( cl_command_queue q )  // Returns CL_SUCCESS on success
     cl_int err = CL_SUCCESS;
-    if (gFloatToHalfRoundingMode == kDefaultRoundingMode)
+    if( gFloatToHalfRoundingMode == kDefaultRoundingMode )
-        // Some numbers near 0.5f, that we look at to see how the values are
-        // rounded.
-        static const cl_uint inData[4 * 4] = {
-            0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U,
-            0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
-            0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U,
-            0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U
-        };
-        static const size_t count = sizeof(inData) / (4 * sizeof(inData[0]));
-        const float *inp = (const float *)inData;
+        // Some numbers near 0.5f, that we look at to see how the values are rounded.
+        static const cl_uint  inData[4*4] = {   0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U, 0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
+                                                0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U, 0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U    };
+        static const size_t count = sizeof( inData ) / (4*sizeof( inData[0] ));
+        const float *inp = (const float*) inData;
         cl_context context = NULL;
-        // Create an input buffer
-        err = clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(context),
-                                    &context, NULL);
-        if (err)
+    // Create an input buffer
+        err = clGetCommandQueueInfo( q, CL_QUEUE_CONTEXT, sizeof(context), &context, NULL );
+        if( err )
-            log_error("Error:  could not get context from command queue in "
-                      "DetectFloatToHalfRoundingMode  (%d)",
-                      err);
+            log_error( "Error:  could not get context from command queue in DetectFloatToHalfRoundingMode  (%d)", err );
             return err;
-        cl_mem inBuf = clCreateBuffer(context,
-                                      CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR
-                                          | CL_MEM_ALLOC_HOST_PTR,
-                                      sizeof(inData), (void *)inData, &err);
-        if (NULL == inBuf || err)
+        cl_mem inBuf = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, sizeof( inData ), (void*) inData, &err );
+        if( NULL == inBuf || err )
-            log_error("Error:  could not create input buffer in "
-                      "DetectFloatToHalfRoundingMode  (err: %d)",
-                      err);
+            log_error( "Error:  could not create input buffer in DetectFloatToHalfRoundingMode  (err: %d)", err );
             return err;
-        // Create a small output image
+    // Create a small output image
         cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
-        cl_mem outImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &fmt,
-                                          count, 1, 0, NULL, &err);
-        if (NULL == outImage || err)
+        cl_mem outImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &fmt, count, 1, 0, NULL, &err );
+        if( NULL == outImage || err )
-            log_error("Error:  could not create half float out image in "
-                      "DetectFloatToHalfRoundingMode  (err: %d)",
-                      err);
-            clReleaseMemObject(inBuf);
+            log_error( "Error:  could not create half float out image in DetectFloatToHalfRoundingMode  (err: %d)", err );
+            clReleaseMemObject( inBuf );
             return err;
-        // Create our program, and a kernel
-        const char *kernelSource[1] = {
-            "kernel void detect_round( global float4 *in, write_only image2d_t "
-            "out )\n"
+    // Create our program, and a kernel
+        const char *kernel[1] = {
+            "kernel void detect_round( global float4 *in, write_only image2d_t out )\n"
-            "   write_imagef( out, (int2)(get_global_id(0),0), "
-            "in[get_global_id(0)] );\n"
-            "}\n"
-        };
+            "   write_imagef( out, (int2)(get_global_id(0),0), in[get_global_id(0)] );\n"
+            "}\n" };
         clProgramWrapper program;
-        clKernelWrapper kernel;
-        err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                          kernelSource, "detect_round");
+        err = create_single_kernel_helper_create_program(context, &program, 1, kernel);
-        if (NULL == program || err)
+        if( NULL == program || err )
-            log_error("Error:  could not create program in "
-                      "DetectFloatToHalfRoundingMode (err: %d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error:  could not create program in DetectFloatToHalfRoundingMode (err: %d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
             return err;
         cl_device_id device = NULL;
-        err = clGetCommandQueueInfo(q, CL_QUEUE_DEVICE, sizeof(device), &device,
-                                    NULL);
-        if (err)
+        err = clGetCommandQueueInfo( q, CL_QUEUE_DEVICE, sizeof(device), &device, NULL );
+        if( err )
-            log_error("Error:  could not get device from command queue in "
-                      "DetectFloatToHalfRoundingMode  (%d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error:  could not get device from command queue in DetectFloatToHalfRoundingMode  (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
             return err;
-        err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inBuf);
-        if (err)
+        err = clBuildProgram( program, 1, &device, "", NULL, NULL );
+        if( err )
-            log_error("Error: could not set argument 0 of kernel in "
-                      "DetectFloatToHalfRoundingMode (%d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error:  could not build program in DetectFloatToHalfRoundingMode  (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
             return err;
-        err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outImage);
-        if (err)
+        cl_kernel k = clCreateKernel( program, "detect_round", &err );
+        if( NULL == k || err )
-            log_error("Error: could not set argument 1 of kernel in "
-                      "DetectFloatToHalfRoundingMode (%d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error:  could not create kernel in DetectFloatToHalfRoundingMode  (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
             return err;
-        // Run the kernel
+        err = clSetKernelArg( k, 0, sizeof( cl_mem ), &inBuf );
+        if( err )
+        {
+            log_error( "Error: could not set argument 0 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
+            return err;
+        }
+        err = clSetKernelArg( k, 1, sizeof( cl_mem ), &outImage );
+        if( err )
+        {
+            log_error( "Error: could not set argument 1 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
+            return err;
+        }
+    // Run the kernel
         size_t global_work_size = count;
-        err = clEnqueueNDRangeKernel(q, kernel, 1, NULL, &global_work_size,
-                                     NULL, 0, NULL, NULL);
-        if (err)
+        err = clEnqueueNDRangeKernel( q, k, 1, NULL, &global_work_size, NULL, 0, NULL, NULL );
+        if( err )
-            log_error("Error: could not enqueue kernel in "
-                      "DetectFloatToHalfRoundingMode (%d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error: could not enqueue kernel in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
             return err;
-        // read the results
-        cl_half outBuf[count * 4];
-        memset(outBuf, -1, sizeof(outBuf));
-        size_t origin[3] = { 0, 0, 0 };
-        size_t region[3] = { count, 1, 1 };
-        err = clEnqueueReadImage(q, outImage, CL_TRUE, origin, region, 0, 0,
-                                 outBuf, 0, NULL, NULL);
-        if (err)
+    // read the results
+        cl_ushort outBuf[count*4];
+        memset( outBuf, -1, sizeof( outBuf ) );
+        size_t origin[3] = {0,0,0};
+        size_t region[3] = {count,1,1};
+        err = clEnqueueReadImage( q, outImage, CL_TRUE, origin, region, 0, 0, outBuf, 0, NULL, NULL );
+        if( err )
-            log_error("Error: could not read output image in "
-                      "DetectFloatToHalfRoundingMode (%d)",
-                      err);
-            clReleaseMemObject(inBuf);
-            clReleaseMemObject(outImage);
+            log_error( "Error: could not read output image in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
             return err;
-        // Generate our list of reference results
-        cl_half rte_ref[count * 4];
-        cl_half rtz_ref[count * 4];
-        for (size_t i = 0; i < 4 * count; i++)
+    // Generate our list of reference results
+        cl_ushort rte_ref[count*4];
+        cl_ushort rtz_ref[count*4];
+        for( size_t i = 0; i < 4 * count; i++ )
-            rte_ref[i] = cl_half_from_float(inp[i], CL_HALF_RTE);
-            rtz_ref[i] = cl_half_from_float(inp[i], CL_HALF_RTZ);
+            rte_ref[i] = float2half_rte( inp[i] );
+            rtz_ref[i] = float2half_rtz( inp[i] );
-        // Verify that we got something in either rtz or rte mode
-        if (0 == memcmp(rte_ref, outBuf, sizeof(rte_ref)))
+    // Verify that we got something in either rtz or rte mode
+        if( 0 == memcmp( rte_ref, outBuf, sizeof( rte_ref )) )
-            log_info("Autodetected float->half rounding mode to be rte\n");
+            log_info( "Autodetected float->half rounding mode to be rte\n" );
             gFloatToHalfRoundingMode = kRoundToNearestEven;
-        else if (0 == memcmp(rtz_ref, outBuf, sizeof(rtz_ref)))
+        else if ( 0 == memcmp( rtz_ref, outBuf, sizeof( rtz_ref )) )
-            log_info("Autodetected float->half rounding mode to be rtz\n");
+            log_info( "Autodetected float->half rounding mode to be rtz\n" );
             gFloatToHalfRoundingMode = kRoundTowardZero;
-            log_error("ERROR: float to half conversions proceed with invalid "
-                      "rounding mode!\n");
-            log_info("\nfor:");
-            for (size_t i = 0; i < count; i++)
-                log_info(" {%a, %a, %a, %a},", inp[4 * i], inp[4 * i + 1],
-                         inp[4 * i + 2], inp[4 * i + 3]);
-            log_info("\ngot:");
-            for (size_t i = 0; i < count; i++)
-                log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
-                         outBuf[4 * i], outBuf[4 * i + 1], outBuf[4 * i + 2],
-                         outBuf[4 * i + 3]);
-            log_info("\nrte:");
-            for (size_t i = 0; i < count; i++)
-                log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
-                         rte_ref[4 * i], rte_ref[4 * i + 1], rte_ref[4 * i + 2],
-                         rte_ref[4 * i + 3]);
-            log_info("\nrtz:");
-            for (size_t i = 0; i < count; i++)
-                log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
-                         rtz_ref[4 * i], rtz_ref[4 * i + 1], rtz_ref[4 * i + 2],
-                         rtz_ref[4 * i + 3]);
-            log_info("\n");
+            log_error( "ERROR: float to half conversions proceed with invalid rounding mode!\n" );
+            log_info( "\nfor:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {%a, %a, %a, %a},", inp[4*i], inp[4*i+1], inp[4*i+2], inp[4*i+3] );
+            log_info( "\ngot:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", outBuf[4*i], outBuf[4*i+1], outBuf[4*i+2], outBuf[4*i+3] );
+            log_info( "\nrte:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rte_ref[4*i], rte_ref[4*i+1], rte_ref[4*i+2], rte_ref[4*i+3] );
+            log_info( "\nrtz:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rtz_ref[4*i], rtz_ref[4*i+1], rtz_ref[4*i+2], rtz_ref[4*i+3] );
+            log_info( "\n" );
             err = -1;
-            gFloatToHalfRoundingMode = kRoundingModeCount; // illegal value
+            gFloatToHalfRoundingMode = kRoundingModeCount;  // illegal value
-        // clean up
-        clReleaseMemObject(inBuf);
-        clReleaseMemObject(outImage);
+    // clean up
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        clReleaseKernel( k );
         return err;
-    // Make sure that the rounding mode was successfully detected, if we checked
-    // earlier
-    if (gFloatToHalfRoundingMode != kRoundToNearestEven
-        && gFloatToHalfRoundingMode != kRoundTowardZero)
+    // Make sure that the rounding mode was successfully detected, if we checked earlier
+    if( gFloatToHalfRoundingMode != kRoundToNearestEven && gFloatToHalfRoundingMode != kRoundTowardZero)
         return -2;
     return err;
-char *create_random_image_data(ExplicitType dataType,
-                               image_descriptor *imageInfo,
-                               BufferOwningPtr<char> &P, MTdata d,
-                               bool image2DFromBuffer)
+char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer )
-    size_t allocSize, numPixels;
-    if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
-    {
-        allocSize = (size_t)(compute_mipmapped_image_size(*imageInfo) * 4
-                             * get_explicit_type_size(dataType))
-            / get_pixel_size(imageInfo->format);
-        numPixels = allocSize / (get_explicit_type_size(dataType) * 4);
-    }
-    else
-    {
-        numPixels = (image2DFromBuffer ? imageInfo->rowPitch : imageInfo->width)
-            * imageInfo->height * (imageInfo->depth ? imageInfo->depth : 1)
-            * (imageInfo->arraySize ? imageInfo->arraySize : 1);
-        allocSize = numPixels * 4 * get_explicit_type_size(dataType);
-    }
+  size_t allocSize, numPixels;
+  if ( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
+  {
+    allocSize = (size_t) (compute_mipmapped_image_size(*imageInfo) * 4 * get_explicit_type_size( dataType ))/get_pixel_size(imageInfo->format);
+    numPixels = allocSize / (get_explicit_type_size( dataType ) * 4);
+  }
+  else
+  {
+    numPixels = (image2DFromBuffer? imageInfo->rowPitch: imageInfo->width) * imageInfo->height
+      * (imageInfo->depth ? imageInfo->depth : 1)
+      * (imageInfo->arraySize ? imageInfo->arraySize : 1);
+    allocSize = numPixels * 4 * get_explicit_type_size( dataType );
+  }
 #if 0 // DEBUG
@@ -3192,201 +3150,199 @@
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
     char *data = NULL;
-    if (gDeviceType == CL_DEVICE_TYPE_CPU)
-    {
-        size_t mapSize =
-            ((allocSize + 4095L) & -4096L) + 8192; // alloc two extra pages.
+    if (gDeviceType == CL_DEVICE_TYPE_CPU) {
+      size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192; // alloc two extra pages.
-        void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE,
-                         MAP_ANON | MAP_PRIVATE, 0, 0);
-        if (map == MAP_FAILED)
-        {
-            perror("create_random_image_data: mmap");
-            log_error("%s:%d: mmap failed, mapSize = %zu\n", __FILE__, __LINE__,
-                      mapSize);
-        }
-        intptr_t data_end = (intptr_t)map + mapSize - 4096;
-        data = (char *)(data_end - (intptr_t)allocSize);
+      void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+      if (map == MAP_FAILED)
+      {
+        perror("create_random_image_data: mmap");
+        log_error("%s:%d: mmap failed, mapSize = %zu\n",__FILE__,__LINE__,mapSize);
+      }
+      intptr_t data_end = (intptr_t)map + mapSize - 4096;
+      data = (char *)(data_end - (intptr_t)allocSize);
-        mprotect(map, 4096, PROT_NONE);
-        mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
-        P.reset(data, map, mapSize);
-    }
-    else
-    {
-        data = (char *)malloc(allocSize);
-        P.reset(data);
+      mprotect(map, 4096, PROT_NONE);
+      mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
+      P.reset(data, map, mapSize);
+    } else {
+      data = (char *)malloc(allocSize);
+      P.reset(data);
-    char *data =
-        (char *)align_malloc(allocSize, get_pixel_alignment(imageInfo->format));
-    P.reset(data, NULL, 0, allocSize, true);
+    char *data = (char *)align_malloc(allocSize, get_pixel_size(imageInfo->format));
+    P.reset(data,NULL,0,allocSize,true);
-    if (data == NULL)
-    {
-        log_error(
-            "ERROR: Unable to malloc %lu bytes for create_random_image_data\n",
-            allocSize);
+    if (data == NULL) {
+        log_error( "ERROR: Unable to malloc %lu bytes for create_random_image_data\n", allocSize );
         return NULL;
-    switch (dataType)
+    switch( dataType )
-        case kFloat: {
+        case kFloat:
+        {
             float *inputValues = (float *)data;
             switch (imageInfo->format->image_channel_data_type)
-                case CL_HALF_FLOAT: {
-                    // Generate data that is (mostly) inside the range of a half
-                    // float const float HALF_MIN = 5.96046448e-08f;
-                    const float HALF_MAX = 65504.0f;
-                    size_t i = 0;
-                    inputValues[i++] = 0.f;
-                    inputValues[i++] = 1.f;
-                    inputValues[i++] = -1.f;
-                    inputValues[i++] = 2.f;
-                    for (; i < numPixels * 4; i++)
-                        inputValues[i] = get_random_float(-HALF_MAX - 2.f,
-                                                          HALF_MAX + 2.f, d);
-                }
-                break;
-                case CL_SFIXED14_APPLE: {
-                    size_t i = 0;
-                    if (numPixels * 4 >= 8)
+                case CL_HALF_FLOAT:
-                        inputValues[i++] = INFINITY;
-                        inputValues[i++] = 0x1.0p14f;
-                        inputValues[i++] = 0x1.0p31f;
-                        inputValues[i++] = 0x1.0p32f;
-                        inputValues[i++] = -INFINITY;
-                        inputValues[i++] = -0x1.0p14f;
-                        inputValues[i++] = -0x1.0p31f;
-                        inputValues[i++] = -0x1.1p31f;
+                        // Generate data that is (mostly) inside the range of a half float
+                        // const float HALF_MIN = 5.96046448e-08f;
+                        const float HALF_MAX = 65504.0f;
+                        size_t i = 0;
+                        inputValues[ i++ ] = 0.f;
+                        inputValues[ i++ ] = 1.f;
+                        inputValues[ i++ ] = -1.f;
+                        inputValues[ i++ ] = 2.f;
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -HALF_MAX - 2.f, HALF_MAX + 2.f, d );
-                    for (; i < numPixels * 4; i++)
-                        inputValues[i] = get_random_float(-1.1f, 3.1f, d);
-                }
-                break;
+                    break;
+                case CL_SFIXED14_APPLE:
+                    {
+                        size_t i = 0;
+                        if( numPixels * 4 >= 8 )
+                        {
+                            inputValues[ i++ ] = INFINITY;
+                            inputValues[ i++ ] = 0x1.0p14f;
+                            inputValues[ i++ ] = 0x1.0p31f;
+                            inputValues[ i++ ] = 0x1.0p32f;
+                            inputValues[ i++ ] = -INFINITY;
+                            inputValues[ i++ ] = -0x1.0p14f;
+                            inputValues[ i++ ] = -0x1.0p31f;
+                            inputValues[ i++ ] = -0x1.1p31f;
+                        }
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -1.1f, 3.1f, d );
+                    }
+                    break;
-                case CL_FLOAT: {
-                    size_t i = 0;
-                    inputValues[i++] = INFINITY;
-                    inputValues[i++] = -INFINITY;
-                    inputValues[i++] = 0.0f;
-                    inputValues[i++] = 0.0f;
-                    cl_uint *p = (cl_uint *)data;
-                    for (; i < numPixels * 4; i++) p[i] = genrand_int32(d);
-                }
-                break;
+                case CL_FLOAT:
+                    {
+                        size_t i = 0;
+                        inputValues[ i++ ] = INFINITY;
+                        inputValues[ i++ ] = -INFINITY;
+                        inputValues[ i++ ] = 0.0f;
+                        inputValues[ i++ ] = 0.0f;
+                        cl_uint *p = (cl_uint *)data;
+                        for( ; i < numPixels * 4; i++ )
+                            p[ i ] = genrand_int32(d);
+                    }
+                    break;
                     size_t i = 0;
-                    if (numPixels * 4 >= 36)
+                    if( numPixels * 4 >= 36 )
-                        inputValues[i++] = 0.0f;
-                        inputValues[i++] = 0.5f;
-                        inputValues[i++] = 31.5f;
-                        inputValues[i++] = 32.0f;
-                        inputValues[i++] = 127.5f;
-                        inputValues[i++] = 128.0f;
-                        inputValues[i++] = 255.5f;
-                        inputValues[i++] = 256.0f;
-                        inputValues[i++] = 1023.5f;
-                        inputValues[i++] = 1024.0f;
-                        inputValues[i++] = 32767.5f;
-                        inputValues[i++] = 32768.0f;
-                        inputValues[i++] = 65535.5f;
-                        inputValues[i++] = 65536.0f;
-                        inputValues[i++] = 2147483648.0f;
-                        inputValues[i++] = 4294967296.0f;
-                        inputValues[i++] = MAKE_HEX_FLOAT(0x1.0p63f, 1, 63);
-                        inputValues[i++] = MAKE_HEX_FLOAT(0x1.0p64f, 1, 64);
-                        inputValues[i++] = -0.0f;
-                        inputValues[i++] = -0.5f;
-                        inputValues[i++] = -31.5f;
-                        inputValues[i++] = -32.0f;
-                        inputValues[i++] = -127.5f;
-                        inputValues[i++] = -128.0f;
-                        inputValues[i++] = -255.5f;
-                        inputValues[i++] = -256.0f;
-                        inputValues[i++] = -1023.5f;
-                        inputValues[i++] = -1024.0f;
-                        inputValues[i++] = -32767.5f;
-                        inputValues[i++] = -32768.0f;
-                        inputValues[i++] = -65535.5f;
-                        inputValues[i++] = -65536.0f;
-                        inputValues[i++] = -2147483648.0f;
-                        inputValues[i++] = -4294967296.0f;
-                        inputValues[i++] = -MAKE_HEX_FLOAT(0x1.0p63f, 1, 63);
-                        inputValues[i++] = -MAKE_HEX_FLOAT(0x1.0p64f, 1, 64);
+                        inputValues[ i++ ] = 0.0f;
+                        inputValues[ i++ ] = 0.5f;
+                        inputValues[ i++ ] = 31.5f;
+                        inputValues[ i++ ] = 32.0f;
+                        inputValues[ i++ ] = 127.5f;
+                        inputValues[ i++ ] = 128.0f;
+                        inputValues[ i++ ] = 255.5f;
+                        inputValues[ i++ ] = 256.0f;
+                        inputValues[ i++ ] = 1023.5f;
+                        inputValues[ i++ ] = 1024.0f;
+                        inputValues[ i++ ] = 32767.5f;
+                        inputValues[ i++ ] = 32768.0f;
+                        inputValues[ i++ ] = 65535.5f;
+                        inputValues[ i++ ] = 65536.0f;
+                        inputValues[ i++ ] = 2147483648.0f;
+                        inputValues[ i++ ] = 4294967296.0f;
+                        inputValues[ i++ ] = MAKE_HEX_FLOAT( 0x1.0p63f, 1, 63 );
+                        inputValues[ i++ ] = MAKE_HEX_FLOAT( 0x1.0p64f, 1, 64 );
+                        inputValues[ i++ ] = -0.0f;
+                        inputValues[ i++ ] = -0.5f;
+                        inputValues[ i++ ] = -31.5f;
+                        inputValues[ i++ ] = -32.0f;
+                        inputValues[ i++ ] = -127.5f;
+                        inputValues[ i++ ] = -128.0f;
+                        inputValues[ i++ ] = -255.5f;
+                        inputValues[ i++ ] = -256.0f;
+                        inputValues[ i++ ] = -1023.5f;
+                        inputValues[ i++ ] = -1024.0f;
+                        inputValues[ i++ ] = -32767.5f;
+                        inputValues[ i++ ] = -32768.0f;
+                        inputValues[ i++ ] = -65535.5f;
+                        inputValues[ i++ ] = -65536.0f;
+                        inputValues[ i++ ] = -2147483648.0f;
+                        inputValues[ i++ ] = -4294967296.0f;
+                        inputValues[ i++ ] = -MAKE_HEX_FLOAT( 0x1.0p63f, 1, 63 );
+                        inputValues[ i++ ] = -MAKE_HEX_FLOAT( 0x1.0p64f, 1, 64 );
-                    if (is_format_signed(imageInfo->format))
+                    if( is_format_signed(imageInfo->format) )
-                        for (; i < numPixels * 4; i++)
-                            inputValues[i] = get_random_float(-1.1f, 1.1f, d);
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -1.1f, 1.1f, d );
-                        for (; i < numPixels * 4; i++)
-                            inputValues[i] = get_random_float(-0.1f, 1.1f, d);
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
-        case kInt: {
+        case kInt:
+        {
             int *imageData = (int *)data;
             // We want to generate ints (mostly) in range of the target format
-            int formatMin = get_format_min_int(imageInfo->format);
-            size_t formatMax = get_format_max_int(imageInfo->format);
-            if (formatMin == 0)
+            int formatMin = get_format_min_int( imageInfo->format );
+            size_t formatMax = get_format_max_int( imageInfo->format );
+            if( formatMin == 0 )
-                // Unsigned values, but we are only an int, so cap the actual
-                // max at the max of signed ints
-                if (formatMax > 2147483647L) formatMax = 2147483647L;
+                // Unsigned values, but we are only an int, so cap the actual max at the max of signed ints
+                if( formatMax > 2147483647L )
+                    formatMax = 2147483647L;
-            // If the final format is small enough, give us a bit of room for
-            // out-of-range values to test
-            if (formatMax < 2147483647L) formatMax += 2;
-            if (formatMin > -2147483648LL) formatMin -= 2;
+            // If the final format is small enough, give us a bit of room for out-of-range values to test
+            if( formatMax < 2147483647L )
+                formatMax += 2;
+            if( formatMin > -2147483648LL )
+                formatMin -= 2;
             // Now gen
-            for (size_t i = 0; i < numPixels * 4; i++)
+            for( size_t i = 0; i < numPixels * 4; i++ )
-                imageData[i] = random_in_range(formatMin, (int)formatMax, d);
+                imageData[ i ] = random_in_range( formatMin, (int)formatMax, d );
         case kUInt:
-        case kUnsignedInt: {
+        case kUnsignedInt:
+        {
             unsigned int *imageData = (unsigned int *)data;
             // We want to generate ints (mostly) in range of the target format
-            int formatMin = get_format_min_int(imageInfo->format);
-            size_t formatMax = get_format_max_int(imageInfo->format);
-            if (formatMin < 0) formatMin = 0;
-            // If the final format is small enough, give us a bit of room for
-            // out-of-range values to test
-            if (formatMax < 4294967295LL) formatMax += 2;
+            int formatMin = get_format_min_int( imageInfo->format );
+            size_t formatMax = get_format_max_int( imageInfo->format );
+            if( formatMin < 0 )
+                formatMin = 0;
+            // If the final format is small enough, give us a bit of room for out-of-range values to test
+            if( formatMax < 4294967295LL )
+                formatMax += 2;
             // Now gen
-            for (size_t i = 0; i < numPixels * 4; i++)
+            for( size_t i = 0; i < numPixels * 4; i++ )
-                imageData[i] = random_in_range(formatMin, (int)formatMax, d);
+                imageData[ i ] = random_in_range( formatMin, (int)formatMax, d );
             // Unsupported source format
-            delete[] data;
+            delete [] data;
             return NULL;
@@ -3395,8 +3351,7 @@
-bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t
-max, int &outValue )
+bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue )
     int v = (int)value;
@@ -3416,8 +3371,8 @@
             return false;
-            log_info( "ERROR: unimplemented for CL_ADDRESS_MIRRORED_REPEAT. Do
-we ever use this? exit(-1);
+            log_info( "ERROR: unimplemented for CL_ADDRESS_MIRRORED_REPEAT. Do we ever use this?
+            exit(-1);
             if( v < 0 )
@@ -3437,203 +3392,170 @@
-void get_sampler_kernel_code(image_sampler_data *imageSampler, char *outLine)
+void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine )
     const char *normalized;
     const char *addressMode;
     const char *filterMode;
-    if (imageSampler->addressing_mode == CL_ADDRESS_CLAMP)
+    if( imageSampler->addressing_mode == CL_ADDRESS_CLAMP )
         addressMode = "CLK_ADDRESS_CLAMP";
-    else if (imageSampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE)
+    else if( imageSampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE )
         addressMode = "CLK_ADDRESS_CLAMP_TO_EDGE";
-    else if (imageSampler->addressing_mode == CL_ADDRESS_REPEAT)
+    else if( imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
         addressMode = "CLK_ADDRESS_REPEAT";
-    else if (imageSampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT)
+    else if( imageSampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT )
         addressMode = "CLK_ADDRESS_MIRRORED_REPEAT";
-    else if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
+    else if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
         addressMode = "CLK_ADDRESS_NONE";
-        log_error("**Error: Unknown addressing mode! Aborting...\n");
+        log_error( "**Error: Unknown addressing mode! Aborting...\n" );
-    if (imageSampler->normalized_coords)
+    if( imageSampler->normalized_coords )
         normalized = "CLK_NORMALIZED_COORDS_TRUE";
         normalized = "CLK_NORMALIZED_COORDS_FALSE";
-    if (imageSampler->filter_mode == CL_FILTER_LINEAR)
+    if( imageSampler->filter_mode == CL_FILTER_LINEAR )
         filterMode = "CLK_FILTER_LINEAR";
         filterMode = "CLK_FILTER_NEAREST";
-    sprintf(outLine, "    const sampler_t imageSampler = %s | %s | %s;\n",
-            addressMode, filterMode, normalized);
+    sprintf( outLine, "    const sampler_t imageSampler = %s | %s | %s;\n", addressMode, filterMode, normalized );
-void copy_image_data(image_descriptor *srcImageInfo,
-                     image_descriptor *dstImageInfo, void *imageValues,
-                     void *destImageValues, const size_t sourcePos[],
-                     const size_t destPos[], const size_t regionSize[])
+void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
+                     const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] )
-    //  assert( srcImageInfo->format == dstImageInfo->format );
+  //  assert( srcImageInfo->format == dstImageInfo->format );
-    size_t src_mip_level_offset = 0, dst_mip_level_offset = 0;
-    size_t sourcePos_lod[3], destPos_lod[3], src_lod, dst_lod;
-    size_t src_row_pitch_lod, src_slice_pitch_lod;
-    size_t dst_row_pitch_lod, dst_slice_pitch_lod;
+  size_t src_mip_level_offset = 0, dst_mip_level_offset = 0;
+  size_t sourcePos_lod[3], destPos_lod[3], src_lod, dst_lod;
+  size_t src_row_pitch_lod, src_slice_pitch_lod;
+  size_t dst_row_pitch_lod, dst_slice_pitch_lod;
-    size_t pixelSize = get_pixel_size(srcImageInfo->format);
+  size_t pixelSize = get_pixel_size( srcImageInfo->format );
-    sourcePos_lod[0] = sourcePos[0];
-    sourcePos_lod[1] = sourcePos[1];
-    sourcePos_lod[2] = sourcePos[2];
-    destPos_lod[0] = destPos[0];
-    destPos_lod[1] = destPos[1];
-    destPos_lod[2] = destPos[2];
-    src_row_pitch_lod = srcImageInfo->rowPitch;
-    dst_row_pitch_lod = dstImageInfo->rowPitch;
-    src_slice_pitch_lod = srcImageInfo->slicePitch;
-    dst_slice_pitch_lod = dstImageInfo->slicePitch;
+  sourcePos_lod[0] = sourcePos[0];
+  sourcePos_lod[1] = sourcePos[1];
+  sourcePos_lod[2] = sourcePos[2];
+  destPos_lod[0] = destPos[0];
+  destPos_lod[1] = destPos[1];
+  destPos_lod[2] = destPos[2];
+  src_row_pitch_lod = srcImageInfo->rowPitch;
+  dst_row_pitch_lod = dstImageInfo->rowPitch;
+  src_slice_pitch_lod = srcImageInfo->slicePitch;
+  dst_slice_pitch_lod = dstImageInfo->slicePitch;
-    if (srcImageInfo->num_mip_levels > 1)
+  if( srcImageInfo->num_mip_levels > 1)
+  {
+    size_t src_width_lod = 1/*srcImageInfo->width*/;
+    size_t src_height_lod = 1/*srcImageInfo->height*/;
+    size_t src_depth_lod = 1/*srcImageInfo->depth*/;
+    switch( srcImageInfo->type )
-        size_t src_width_lod = 1 /*srcImageInfo->width*/;
-        size_t src_height_lod = 1 /*srcImageInfo->height*/;
-        size_t src_depth_lod = 1 /*srcImageInfo->depth*/;
+      src_lod = sourcePos[1];
+      sourcePos_lod[1] = sourcePos_lod[2] = 0;
+      src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
+      break;
+      src_lod = sourcePos[2];
+      sourcePos_lod[1] = sourcePos[1];
+      sourcePos_lod[2] = 0;
+      src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
+      if( srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D )
+        src_height_lod = (srcImageInfo->height >> src_lod ) ? ( srcImageInfo->height >> src_lod ): 1;
+      break;
+      src_lod = sourcePos[3];
+      sourcePos_lod[1] = sourcePos[1];
+      sourcePos_lod[2] = sourcePos[2];
+      src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
+      src_height_lod = (srcImageInfo->height >> src_lod ) ? ( srcImageInfo->height >> src_lod ): 1;
+      if( srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D )
+        src_depth_lod = (srcImageInfo->depth >> src_lod ) ? ( srcImageInfo->depth >> src_lod ): 1;
+      break;
-        switch (srcImageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE1D:
-                src_lod = sourcePos[1];
-                sourcePos_lod[1] = sourcePos_lod[2] = 0;
-                src_width_lod = (srcImageInfo->width >> src_lod)
-                    ? (srcImageInfo->width >> src_lod)
-                    : 1;
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE2D:
-                src_lod = sourcePos[2];
-                sourcePos_lod[1] = sourcePos[1];
-                sourcePos_lod[2] = 0;
-                src_width_lod = (srcImageInfo->width >> src_lod)
-                    ? (srcImageInfo->width >> src_lod)
-                    : 1;
-                if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D)
-                    src_height_lod = (srcImageInfo->height >> src_lod)
-                        ? (srcImageInfo->height >> src_lod)
-                        : 1;
-                break;
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE3D:
-                src_lod = sourcePos[3];
-                sourcePos_lod[1] = sourcePos[1];
-                sourcePos_lod[2] = sourcePos[2];
-                src_width_lod = (srcImageInfo->width >> src_lod)
-                    ? (srcImageInfo->width >> src_lod)
-                    : 1;
-                src_height_lod = (srcImageInfo->height >> src_lod)
-                    ? (srcImageInfo->height >> src_lod)
-                    : 1;
-                if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
-                    src_depth_lod = (srcImageInfo->depth >> src_lod)
-                        ? (srcImageInfo->depth >> src_lod)
-                        : 1;
-                break;
-        }
-        src_mip_level_offset = compute_mip_level_offset(srcImageInfo, src_lod);
-        src_row_pitch_lod =
-            src_width_lod * get_pixel_size(srcImageInfo->format);
-        src_slice_pitch_lod = src_row_pitch_lod * src_height_lod;
+    }
+    src_mip_level_offset = compute_mip_level_offset( srcImageInfo, src_lod );
+    src_row_pitch_lod = src_width_lod * get_pixel_size( srcImageInfo->format );
+    src_slice_pitch_lod = src_row_pitch_lod * src_height_lod;
+  }
+  if( dstImageInfo->num_mip_levels > 1)
+  {
+    size_t dst_width_lod = 1/*dstImageInfo->width*/;
+    size_t dst_height_lod = 1/*dstImageInfo->height*/;
+    size_t dst_depth_lod = 1 /*dstImageInfo->depth*/;
+    switch( dstImageInfo->type )
+    {
+      dst_lod = destPos[1];
+      destPos_lod[1] = destPos_lod[2] = 0;
+      dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
+      break;
+      dst_lod = destPos[2];
+      destPos_lod[1] = destPos[1];
+      destPos_lod[2] = 0;
+      dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
+      if( dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D )
+        dst_height_lod = (dstImageInfo->height >> dst_lod ) ? ( dstImageInfo->height >> dst_lod ): 1;
+      break;
+      dst_lod = destPos[3];
+      destPos_lod[1] = destPos[1];
+      destPos_lod[2] = destPos[2];
+      dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
+      dst_height_lod = (dstImageInfo->height >> dst_lod ) ? ( dstImageInfo->height >> dst_lod ): 1;
+      if( dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D )
+        dst_depth_lod = (dstImageInfo->depth >> dst_lod ) ? ( dstImageInfo->depth >> dst_lod ): 1;
+      break;
+    }
+    dst_mip_level_offset = compute_mip_level_offset( dstImageInfo, dst_lod );
+    dst_row_pitch_lod = dst_width_lod * get_pixel_size( dstImageInfo->format);
+    dst_slice_pitch_lod = dst_row_pitch_lod * dst_height_lod;
+  }
+  // Get initial pointers
+  char *sourcePtr = (char *)imageValues + sourcePos_lod[ 2 ] * src_slice_pitch_lod + sourcePos_lod[ 1 ] * src_row_pitch_lod + pixelSize * sourcePos_lod[ 0 ] + src_mip_level_offset;
+  char *destPtr = (char *)destImageValues + destPos_lod[ 2 ] * dst_slice_pitch_lod + destPos_lod[ 1 ] * dst_row_pitch_lod + pixelSize * destPos_lod[ 0 ] + dst_mip_level_offset;
+  for( size_t z = 0; z < ( regionSize[ 2 ] > 0 ? regionSize[ 2 ] : 1 ); z++ )
+  {
+    char *rowSourcePtr = sourcePtr;
+    char *rowDestPtr = destPtr;
+    for( size_t y = 0; y < regionSize[ 1 ]; y++ )
+    {
+      memcpy( rowDestPtr, rowSourcePtr, pixelSize * regionSize[ 0 ] );
+      rowSourcePtr += src_row_pitch_lod;
+      rowDestPtr += dst_row_pitch_lod;
-    if (dstImageInfo->num_mip_levels > 1)
-    {
-        size_t dst_width_lod = 1 /*dstImageInfo->width*/;
-        size_t dst_height_lod = 1 /*dstImageInfo->height*/;
-        size_t dst_depth_lod = 1 /*dstImageInfo->depth*/;
-        switch (dstImageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE1D:
-                dst_lod = destPos[1];
-                destPos_lod[1] = destPos_lod[2] = 0;
-                dst_width_lod = (dstImageInfo->width >> dst_lod)
-                    ? (dstImageInfo->width >> dst_lod)
-                    : 1;
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE2D:
-                dst_lod = destPos[2];
-                destPos_lod[1] = destPos[1];
-                destPos_lod[2] = 0;
-                dst_width_lod = (dstImageInfo->width >> dst_lod)
-                    ? (dstImageInfo->width >> dst_lod)
-                    : 1;
-                if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D)
-                    dst_height_lod = (dstImageInfo->height >> dst_lod)
-                        ? (dstImageInfo->height >> dst_lod)
-                        : 1;
-                break;
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE3D:
-                dst_lod = destPos[3];
-                destPos_lod[1] = destPos[1];
-                destPos_lod[2] = destPos[2];
-                dst_width_lod = (dstImageInfo->width >> dst_lod)
-                    ? (dstImageInfo->width >> dst_lod)
-                    : 1;
-                dst_height_lod = (dstImageInfo->height >> dst_lod)
-                    ? (dstImageInfo->height >> dst_lod)
-                    : 1;
-                if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
-                    dst_depth_lod = (dstImageInfo->depth >> dst_lod)
-                        ? (dstImageInfo->depth >> dst_lod)
-                        : 1;
-                break;
-        }
-        dst_mip_level_offset = compute_mip_level_offset(dstImageInfo, dst_lod);
-        dst_row_pitch_lod =
-            dst_width_lod * get_pixel_size(dstImageInfo->format);
-        dst_slice_pitch_lod = dst_row_pitch_lod * dst_height_lod;
-    }
-    // Get initial pointers
-    char *sourcePtr = (char *)imageValues
-        + sourcePos_lod[2] * src_slice_pitch_lod
-        + sourcePos_lod[1] * src_row_pitch_lod + pixelSize * sourcePos_lod[0]
-        + src_mip_level_offset;
-    char *destPtr = (char *)destImageValues
-        + destPos_lod[2] * dst_slice_pitch_lod
-        + destPos_lod[1] * dst_row_pitch_lod + pixelSize * destPos_lod[0]
-        + dst_mip_level_offset;
-    for (size_t z = 0; z < (regionSize[2] > 0 ? regionSize[2] : 1); z++)
-    {
-        char *rowSourcePtr = sourcePtr;
-        char *rowDestPtr = destPtr;
-        for (size_t y = 0; y < regionSize[1]; y++)
-        {
-            memcpy(rowDestPtr, rowSourcePtr, pixelSize * regionSize[0]);
-            rowSourcePtr += src_row_pitch_lod;
-            rowDestPtr += dst_row_pitch_lod;
-        }
-        sourcePtr += src_slice_pitch_lod;
-        destPtr += dst_slice_pitch_lod;
-    }
+    sourcePtr += src_slice_pitch_lod;
+    destPtr += dst_slice_pitch_lod;
+  }
 float random_float(float low, float high, MTdata d)
-    float t = (float)genrand_real1(d);
+    float t = (float) genrand_real1(d);
     return (1.0f - t) * low + t * high;
-CoordWalker::CoordWalker(void *coords, bool useFloats, size_t vecSize)
+CoordWalker::CoordWalker( void * coords, bool useFloats, size_t vecSize )
-    if (useFloats)
+    if( useFloats )
         mFloatCoords = (cl_float *)coords;
         mIntCoords = NULL;
@@ -3646,470 +3568,380 @@
     mVecSize = vecSize;
-CoordWalker::~CoordWalker() {}
-cl_float CoordWalker::Get(size_t idx, size_t el)
-    if (mIntCoords != NULL)
-        return (cl_float)mIntCoords[idx * mVecSize + el];
+cl_float CoordWalker::Get( size_t idx, size_t el )
+    if( mIntCoords != NULL )
+        return (cl_float)mIntCoords[ idx * mVecSize + el ];
-        return mFloatCoords[idx * mVecSize + el];
+        return mFloatCoords[ idx * mVecSize + el ];
-void print_read_header(const cl_image_format *format,
-                       image_sampler_data *sampler, bool err, int t)
+void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err, int t )
     const char *addressMode = NULL;
     const char *normalizedNames[2] = { "UNNORMALIZED", "NORMALIZED" };
-    if (sampler->addressing_mode == CL_ADDRESS_CLAMP)
+    if( sampler->addressing_mode == CL_ADDRESS_CLAMP )
         addressMode = "CL_ADDRESS_CLAMP";
-    else if (sampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE)
+    else if( sampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE )
         addressMode = "CL_ADDRESS_CLAMP_TO_EDGE";
-    else if (sampler->addressing_mode == CL_ADDRESS_REPEAT)
+    else if( sampler->addressing_mode == CL_ADDRESS_REPEAT )
         addressMode = "CL_ADDRESS_REPEAT";
-    else if (sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT)
+    else if( sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT )
         addressMode = "CL_ADDRESS_MIRRORED_REPEAT";
         addressMode = "CL_ADDRESS_NONE";
-    if (t)
+    if( t )
-        if (err)
-            log_error("[%-7s %-24s %d] - %s - %s - %s - %s\n",
-                      GetChannelOrderName(format->image_channel_order),
-                      GetChannelTypeName(format->image_channel_data_type),
-                      (int)get_format_channel_count(format),
-                      sampler->filter_mode == CL_FILTER_NEAREST
-                          ? "CL_FILTER_NEAREST"
-                          : "CL_FILTER_LINEAR",
+        if( err )
+            log_error( "[%-7s %-24s %d] - %s - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                      GetChannelTypeName( format->image_channel_data_type ),
+                      (int)get_format_channel_count( format ),
+                      sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
                       normalizedNames[sampler->normalized_coords ? 1 : 0],
-                      t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED");
+                      t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED" );
-            log_info("[%-7s %-24s %d] - %s - %s - %s - %s\n",
-                     GetChannelOrderName(format->image_channel_order),
-                     GetChannelTypeName(format->image_channel_data_type),
-                     (int)get_format_channel_count(format),
-                     sampler->filter_mode == CL_FILTER_NEAREST
-                         ? "CL_FILTER_NEAREST"
-                         : "CL_FILTER_LINEAR",
+            log_info( "[%-7s %-24s %d] - %s - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                     GetChannelTypeName( format->image_channel_data_type ),
+                     (int)get_format_channel_count( format ),
+                     sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
                      normalizedNames[sampler->normalized_coords ? 1 : 0],
-                     t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED");
+                     t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED" );
-        if (err)
-            log_error("[%-7s %-24s %d] - %s - %s - %s\n",
-                      GetChannelOrderName(format->image_channel_order),
-                      GetChannelTypeName(format->image_channel_data_type),
-                      (int)get_format_channel_count(format),
-                      sampler->filter_mode == CL_FILTER_NEAREST
-                          ? "CL_FILTER_NEAREST"
-                          : "CL_FILTER_LINEAR",
+        if( err )
+            log_error( "[%-7s %-24s %d] - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                      GetChannelTypeName( format->image_channel_data_type ),
+                      (int)get_format_channel_count( format ),
+                      sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
-                      normalizedNames[sampler->normalized_coords ? 1 : 0]);
+                      normalizedNames[sampler->normalized_coords ? 1 : 0] );
-            log_info("[%-7s %-24s %d] - %s - %s - %s\n",
-                     GetChannelOrderName(format->image_channel_order),
-                     GetChannelTypeName(format->image_channel_data_type),
-                     (int)get_format_channel_count(format),
-                     sampler->filter_mode == CL_FILTER_NEAREST
-                         ? "CL_FILTER_NEAREST"
-                         : "CL_FILTER_LINEAR",
+            log_info( "[%-7s %-24s %d] - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                     GetChannelTypeName( format->image_channel_data_type ),
+                     (int)get_format_channel_count( format ),
+                     sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
-                     normalizedNames[sampler->normalized_coords ? 1 : 0]);
+                     normalizedNames[sampler->normalized_coords ? 1 : 0] );
-void print_write_header(const cl_image_format *format, bool err = false)
+void print_write_header( cl_image_format *format, bool err = false)
-    if (err)
-        log_error("[%-7s %-24s %d]\n",
-                  GetChannelOrderName(format->image_channel_order),
-                  GetChannelTypeName(format->image_channel_data_type),
-                  (int)get_format_channel_count(format));
+    if( err )
+        log_error( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                  GetChannelTypeName( format->image_channel_data_type ),
+                  (int)get_format_channel_count( format ) );
-        log_info("[%-7s %-24s %d]\n",
-                 GetChannelOrderName(format->image_channel_order),
-                 GetChannelTypeName(format->image_channel_data_type),
-                 (int)get_format_channel_count(format));
+        log_info( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                 GetChannelTypeName( format->image_channel_data_type ),
+                 (int)get_format_channel_count( format ) );
-void print_header(const cl_image_format *format, bool err = false)
+void print_header( cl_image_format *format, bool err = false )
-    if (err)
-    {
-        log_error("[%-7s %-24s %d]\n",
-                  GetChannelOrderName(format->image_channel_order),
-                  GetChannelTypeName(format->image_channel_data_type),
-                  (int)get_format_channel_count(format));
-    }
-    else
-    {
-        log_info("[%-7s %-24s %d]\n",
-                 GetChannelOrderName(format->image_channel_order),
-                 GetChannelTypeName(format->image_channel_data_type),
-                 (int)get_format_channel_count(format));
+    if (err) {
+        log_error( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                  GetChannelTypeName( format->image_channel_data_type ),
+                  (int)get_format_channel_count( format ) );
+    } else {
+        log_info( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                 GetChannelTypeName( format->image_channel_data_type ),
+                 (int)get_format_channel_count( format ) );
-bool find_format(cl_image_format *formatList, unsigned int numFormats,
-                 cl_image_format *formatToFind)
+bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind )
-    for (unsigned int i = 0; i < numFormats; i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
-        if (formatList[i].image_channel_order
-                == formatToFind->image_channel_order
-            && formatList[i].image_channel_data_type
-                == formatToFind->image_channel_data_type)
+        if( formatList[ i ].image_channel_order == formatToFind->image_channel_order &&
+           formatList[ i ].image_channel_data_type == formatToFind->image_channel_data_type )
             return true;
     return false;
-void build_required_image_formats(
-    cl_mem_flags flags, cl_mem_object_type image_type, cl_device_id device,
-    std::vector<cl_image_format> &formatsToSupport)
+void build_required_image_formats(cl_mem_flags flags,
+                                  cl_mem_object_type image_type,
+                                  cl_device_id device,
+                                  std::vector<cl_image_format>& formatsToSupport)
-    formatsToSupport.clear();
+	Version version = get_device_cl_version(device);
-    // Minimum list of supported image formats for reading or writing (embedded
-    // profile)
-    static std::vector<cl_image_format> embeddedProfile_readOrWrite{
-        // clang-format off
-        { CL_RGBA, CL_UNORM_INT8 },
-        { CL_RGBA, CL_UNORM_INT16 },
-        { CL_RGBA, CL_SIGNED_INT8 },
-        { CL_RGBA, CL_SIGNED_INT16 },
-        { CL_RGBA, CL_SIGNED_INT32 },
-        { CL_RGBA, CL_UNSIGNED_INT8 },
-        { CL_RGBA, CL_UNSIGNED_INT16 },
-        { CL_RGBA, CL_UNSIGNED_INT32 },
-        { CL_RGBA, CL_HALF_FLOAT },
-        { CL_RGBA, CL_FLOAT },
-        // clang-format on
-    };
+	formatsToSupport.clear();
-    // Minimum list of required image formats for reading or writing
-    // num_channels, for all image types.
-    static std::vector<cl_image_format> fullProfile_readOrWrite{
-        // clang-format off
-        { CL_RGBA, CL_UNORM_INT8 },
-        { CL_RGBA, CL_UNORM_INT16 },
-        { CL_RGBA, CL_SIGNED_INT8 },
-        { CL_RGBA, CL_SIGNED_INT16 },
-        { CL_RGBA, CL_SIGNED_INT32 },
-        { CL_RGBA, CL_UNSIGNED_INT8 },
-        { CL_RGBA, CL_UNSIGNED_INT16 },
-        { CL_RGBA, CL_UNSIGNED_INT32 },
-        { CL_RGBA, CL_HALF_FLOAT },
-        { CL_RGBA, CL_FLOAT },
-        { CL_BGRA, CL_UNORM_INT8 },
-        // clang-format on
-    };
+	// Required embedded formats.
+	static std::vector<cl_image_format> embeddedProfReadOrWriteFormats
+	{
+	};
-    // Minimum list of supported image formats for reading or writing
-    // (OpenCL 2.0, 2.1, or 2.2), for all image types.
-    static std::vector<cl_image_format> fullProfile_2x_readOrWrite{
-        // clang-format off
-        { CL_R, CL_UNORM_INT8 },
-        { CL_R, CL_UNORM_INT16 },
-        { CL_R, CL_SNORM_INT8 },
-        { CL_R, CL_SNORM_INT16 },
-        { CL_R, CL_SIGNED_INT8 },
-        { CL_R, CL_SIGNED_INT16 },
-        { CL_R, CL_SIGNED_INT32 },
-        { CL_R, CL_UNSIGNED_INT8 },
-        { CL_R, CL_UNSIGNED_INT16 },
-        { CL_R, CL_UNSIGNED_INT32 },
-        { CL_R, CL_HALF_FLOAT },
-        { CL_R, CL_FLOAT },
-        { CL_RG, CL_UNORM_INT8 },
-        { CL_RG, CL_UNORM_INT16 },
-        { CL_RG, CL_SNORM_INT8 },
-        { CL_RG, CL_SNORM_INT16 },
-        { CL_RG, CL_SIGNED_INT8 },
-        { CL_RG, CL_SIGNED_INT16 },
-        { CL_RG, CL_SIGNED_INT32 },
-        { CL_RG, CL_UNSIGNED_INT8 },
-        { CL_RG, CL_UNSIGNED_INT16 },
-        { CL_RG, CL_UNSIGNED_INT32 },
-        { CL_RG, CL_HALF_FLOAT },
-        { CL_RG, CL_FLOAT },
-        { CL_RGBA, CL_UNORM_INT8 },
-        { CL_RGBA, CL_UNORM_INT16 },
-        { CL_RGBA, CL_SNORM_INT8 },
-        { CL_RGBA, CL_SNORM_INT16 },
-        { CL_RGBA, CL_SIGNED_INT8 },
-        { CL_RGBA, CL_SIGNED_INT16 },
-        { CL_RGBA, CL_SIGNED_INT32 },
-        { CL_RGBA, CL_UNSIGNED_INT8 },
-        { CL_RGBA, CL_UNSIGNED_INT16 },
-        { CL_RGBA, CL_UNSIGNED_INT32 },
-        { CL_RGBA, CL_HALF_FLOAT },
-        { CL_RGBA, CL_FLOAT },
-        { CL_BGRA, CL_UNORM_INT8 },
-        // clang-format on
-    };
+	/*
+		Required full profile formats.
+		This array does not contain any full profile
+		formats that have restrictions on when they
+		are required.
+	*/
+	static std::vector<cl_image_format> fullProfReadOrWriteFormats
+	{
+	};
-    // Conditional addition to the 2x readOrWrite table:
-    // Support for the CL_DEPTH image channel order is required only for 2D
-    // images and 2D image arrays.
-    static std::vector<cl_image_format> fullProfile_2x_readOrWrite_Depth{
-        // clang-format off
-        { CL_DEPTH, CL_UNORM_INT16 },
-        { CL_DEPTH, CL_FLOAT },
-        // clang-format on
-    };
+	/*
+		Required full profile formats specifically for 2.x.
+		This array does not contain any full profile
+		formats that have restrictions on when they
+		are required.
+	*/
+	static std::vector<cl_image_format> fullProf2XReadOrWriteFormats
+	{
+		{ CL_R, CL_UNORM_INT8 },
+		{ CL_R, CL_UNORM_INT16 },
+		{ CL_R, CL_SNORM_INT8 },
+		{ CL_R, CL_SNORM_INT16 },
+		{ CL_R, CL_SIGNED_INT8 },
+		{ CL_R, CL_SIGNED_INT16 },
+		{ CL_R, CL_SIGNED_INT32 },
+		{ CL_R, CL_FLOAT },
+		{ CL_RG, CL_UNORM_INT8 },
+		{ CL_RG, CL_UNORM_INT16 },
+		{ CL_RG, CL_SNORM_INT8 },
+		{ CL_RG, CL_SNORM_INT16 },
+		{ CL_RG, CL_SIGNED_INT16 },
+		{ CL_RG, CL_SIGNED_INT32 },
+		{ CL_RG, CL_FLOAT },
+	};
-    // Conditional addition to the 2x readOrWrite table:
-    // Support for reading from the CL_sRGBA image channel order is optional for
-    // 1D image buffers. Support for writing to the CL_sRGBA image channel order
-    // is optional for all image types.
-    static std::vector<cl_image_format> fullProfile_2x_readOrWrite_srgb{
-        { CL_sRGBA, CL_UNORM_INT8 },
-    };
+	/*
+		Required full profile formats for CL_DEPTH
+		(specifically 2.x).
+		There are cases whereby the format isn't required.
+	*/
+	static std::vector<cl_image_format> fullProf2XReadOrWriteDepthFormats
+	{
+	};
-    // Minimum list of required image formats for reading and writing.
-    static std::vector<cl_image_format> fullProfile_readAndWrite{
-        // clang-format off
-        { CL_R, CL_UNORM_INT8 },
-        { CL_R, CL_SIGNED_INT8 },
-        { CL_R, CL_SIGNED_INT16 },
-        { CL_R, CL_SIGNED_INT32 },
-        { CL_R, CL_UNSIGNED_INT8 },
-        { CL_R, CL_UNSIGNED_INT16 },
-        { CL_R, CL_UNSIGNED_INT32 },
-        { CL_R, CL_HALF_FLOAT },
-        { CL_R, CL_FLOAT },
-        { CL_RGBA, CL_UNORM_INT8 },
-        { CL_RGBA, CL_SIGNED_INT8 },
-        { CL_RGBA, CL_SIGNED_INT16 },
-        { CL_RGBA, CL_SIGNED_INT32 },
-        { CL_RGBA, CL_UNSIGNED_INT8 },
-        { CL_RGBA, CL_UNSIGNED_INT16 },
-        { CL_RGBA, CL_UNSIGNED_INT32 },
-        { CL_RGBA, CL_HALF_FLOAT },
-        { CL_RGBA, CL_FLOAT },
-        // clang-format on
-    };
+	/*
+		Required full profile formats for CL_sRGB
+		(specifically 2.x).
+		There are cases whereby the format isn't required.
+	*/
+	static std::vector<cl_image_format> fullProf2XSRGBFormats
+	{
+	};
-    // Embedded profile
-    if (gIsEmbedded)
-    {
-        copy(embeddedProfile_readOrWrite.begin(),
-             embeddedProfile_readOrWrite.end(),
-             back_inserter(formatsToSupport));
-    }
-    // Full profile
-    else
-    {
-        Version version = get_device_cl_version(device);
-        if (version < Version(2, 0) || version >= Version(3, 0))
-        {
-            // Full profile, OpenCL 1.2 or 3.0.
-            if (flags & CL_MEM_KERNEL_READ_AND_WRITE)
-            {
-                // Note: assumes that read-write images are supported!
-                copy(fullProfile_readAndWrite.begin(),
-                     fullProfile_readAndWrite.end(),
-                     back_inserter(formatsToSupport));
-            }
-            else
-            {
-                copy(fullProfile_readOrWrite.begin(),
-                     fullProfile_readOrWrite.end(),
-                     back_inserter(formatsToSupport));
-            }
-        }
-        else
-        {
-            // Full profile, OpenCL 2.0, 2.1, 2.2.
-            if (flags & CL_MEM_KERNEL_READ_AND_WRITE)
-            {
-                copy(fullProfile_readAndWrite.begin(),
-                     fullProfile_readAndWrite.end(),
-                     back_inserter(formatsToSupport));
-            }
-            else
-            {
-                copy(fullProfile_2x_readOrWrite.begin(),
-                     fullProfile_2x_readOrWrite.end(),
-                     back_inserter(formatsToSupport));
+	// Embedded profile
+	if (gIsEmbedded)
+	{
+		copy(embeddedProfReadOrWriteFormats.begin(),
+		     embeddedProfReadOrWriteFormats.end(),
+		     back_inserter(formatsToSupport));
+	}
+	// Full profile
+	else
+	{
+		copy(fullProfReadOrWriteFormats.begin(),
+		     fullProfReadOrWriteFormats.end(),
+		     back_inserter(formatsToSupport));
+	}
-                // Support for the CL_DEPTH image channel order is required only
-                // for 2D images and 2D image arrays.
-                if (image_type == CL_MEM_OBJECT_IMAGE2D
-                    || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
-                {
-                    copy(fullProfile_2x_readOrWrite_Depth.begin(),
-                         fullProfile_2x_readOrWrite_Depth.end(),
-                         back_inserter(formatsToSupport));
-                }
+	// Full profile, OpenCL 2.0, 2.1, 2.2
+	if (!gIsEmbedded && version >= Version(2, 0) && version <= Version(2, 2))
+	{
+		copy(fullProf2XReadOrWriteFormats.begin(),
+		     fullProf2XReadOrWriteFormats.end(),
+		     back_inserter(formatsToSupport));
-                // Support for reading from the CL_sRGBA image channel order is
-                // optional for 1D image buffers. Support for writing to the
-                // CL_sRGBA image channel order is optional for all image types.
-                if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER
-                    && flags == CL_MEM_READ_ONLY)
-                {
-                    copy(fullProfile_2x_readOrWrite_srgb.begin(),
-                         fullProfile_2x_readOrWrite_srgb.end(),
-                         back_inserter(formatsToSupport));
-                }
-            }
-        }
-    }
+		// Depth images are only required for 2DArray and 2D images
+		if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D)
+		{
+			copy(fullProf2XReadOrWriteDepthFormats.begin(),
+			     fullProf2XReadOrWriteDepthFormats.end(),
+			     back_inserter(formatsToSupport));
+		}
+		// sRGB is not required for 1DImage Buffers
+		if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
+		{
+			// sRGB is only required for reading
+			if (flags == CL_MEM_READ_ONLY)
+			{
+				copy(fullProf2XSRGBFormats.begin(),
+				     fullProf2XSRGBFormats.end(),
+				     back_inserter(formatsToSupport));
+			}
+		}
+	}
-bool is_image_format_required(cl_image_format format, cl_mem_flags flags,
+bool is_image_format_required(cl_image_format format,
+                              cl_mem_flags flags,
                               cl_mem_object_type image_type,
                               cl_device_id device)
-    std::vector<cl_image_format> formatsToSupport;
-    build_required_image_formats(flags, image_type, device, formatsToSupport);
+	std::vector<cl_image_format> formatsToSupport;
+	build_required_image_formats(flags, image_type, device, formatsToSupport);
-    for (auto &formatItr : formatsToSupport)
-    {
-        if (formatItr.image_channel_order == format.image_channel_order
-            && formatItr.image_channel_data_type
-                == format.image_channel_data_type)
-        {
-            return true;
-        }
-    }
+	for (auto &formatItr: formatsToSupport)
+	{
+		if (formatItr.image_channel_order == format.image_channel_order &&
+		    formatItr.image_channel_data_type == format.image_channel_data_type)
+		{
+			return true;
+		}
+	}
-    return false;
+	return false;
-cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth)
+cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth)
-    cl_uint retMaxMipLevels = 0, max_dim = 0;
+  cl_uint retMaxMipLevels=0, max_dim = 0;
-    max_dim = width;
-    max_dim = height > max_dim ? height : max_dim;
-    max_dim = depth > max_dim ? depth : max_dim;
+  max_dim = width;
+  max_dim = height > max_dim ? height : max_dim;
+  max_dim = depth > max_dim ? depth : max_dim;
-    while (max_dim)
-    {
-        retMaxMipLevels++;
-        max_dim >>= 1;
-    }
-    return retMaxMipLevels;
+  while(max_dim) {
+    retMaxMipLevels++;
+    max_dim >>= 1;
+  }
+  return retMaxMipLevels;
-cl_ulong compute_mipmapped_image_size(image_descriptor imageInfo)
+cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo)
-    cl_ulong retSize = 0;
-    size_t curr_width, curr_height, curr_depth, curr_array_size;
-    curr_width = imageInfo.width;
-    curr_height = imageInfo.height;
-    curr_depth = imageInfo.depth;
-    curr_array_size = imageInfo.arraySize;
+  cl_ulong retSize = 0;
+  size_t curr_width, curr_height, curr_depth, curr_array_size;
+  curr_width = imageInfo.width;
+  curr_height = imageInfo.height;
+  curr_depth = imageInfo.depth;
+  curr_array_size = imageInfo.arraySize;
-    for (int i = 0; i < (int)imageInfo.num_mip_levels; i++)
+  for (int i=0; i < (int) imageInfo.num_mip_levels; i++)
+  {
+    switch ( imageInfo.type )
-        switch (imageInfo.type)
-        {
-            case CL_MEM_OBJECT_IMAGE3D:
-                retSize += (cl_ulong)curr_width * curr_height * curr_depth
-                    * get_pixel_size(imageInfo.format);
-                break;
-            case CL_MEM_OBJECT_IMAGE2D:
-                retSize += (cl_ulong)curr_width * curr_height
-                    * get_pixel_size(imageInfo.format);
-                break;
-            case CL_MEM_OBJECT_IMAGE1D:
-                retSize +=
-                    (cl_ulong)curr_width * get_pixel_size(imageInfo.format);
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                retSize += (cl_ulong)curr_width * curr_array_size
-                    * get_pixel_size(imageInfo.format);
-                break;
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                retSize += (cl_ulong)curr_width * curr_height * curr_array_size
-                    * get_pixel_size(imageInfo.format);
-                break;
-        }
-        switch (imageInfo.type)
-        {
-            case CL_MEM_OBJECT_IMAGE3D:
-                curr_depth = curr_depth >> 1 ? curr_depth >> 1 : 1;
-            case CL_MEM_OBJECT_IMAGE2D:
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                curr_height = curr_height >> 1 ? curr_height >> 1 : 1;
-            case CL_MEM_OBJECT_IMAGE1D:
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                curr_width = curr_width >> 1 ? curr_width >> 1 : 1;
-        }
+      retSize += (cl_ulong)curr_width * curr_height * curr_depth * get_pixel_size(imageInfo.format);
+      break;
+      retSize += (cl_ulong)curr_width * curr_height * get_pixel_size(imageInfo.format);
+      break;
+      retSize += (cl_ulong)curr_width * get_pixel_size(imageInfo.format);
+      break;
+      retSize += (cl_ulong)curr_width * curr_array_size * get_pixel_size(imageInfo.format);
+      break;
+      retSize += (cl_ulong)curr_width * curr_height * curr_array_size * get_pixel_size(imageInfo.format);
+      break;
-    return retSize;
+    switch ( imageInfo.type )
+    {
+      curr_depth = curr_depth >> 1 ? curr_depth >> 1: 1;
+      curr_height = curr_height >> 1? curr_height >> 1 : 1;
+      curr_width = curr_width >> 1? curr_width >> 1 : 1;
+    }
+  }
+  return retSize;
-size_t compute_mip_level_offset(image_descriptor *imageInfo, size_t lod)
+size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod)
-    size_t retOffset = 0;
-    size_t width, height, depth;
-    width = imageInfo->width;
-    height = imageInfo->height;
-    depth = imageInfo->depth;
+  size_t retOffset = 0;
+  size_t width, height,  depth;
+  width = imageInfo->width;
+  height = imageInfo->height;
+  depth = imageInfo->depth;
-    for (size_t i = 0; i < lod; i++)
+  for(size_t i=0; i < lod; i++)
+  {
+    switch(imageInfo->type)
-        switch (imageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                retOffset += (size_t)width * height * imageInfo->arraySize
-                    * get_pixel_size(imageInfo->format);
-                break;
-            case CL_MEM_OBJECT_IMAGE3D:
-                retOffset += (size_t)width * height * depth
-                    * get_pixel_size(imageInfo->format);
-                break;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                retOffset += (size_t)width * imageInfo->arraySize
-                    * get_pixel_size(imageInfo->format);
-                break;
-            case CL_MEM_OBJECT_IMAGE2D:
-                retOffset +=
-                    (size_t)width * height * get_pixel_size(imageInfo->format);
-                break;
-            case CL_MEM_OBJECT_IMAGE1D:
-                retOffset += (size_t)width * get_pixel_size(imageInfo->format);
-                break;
-        }
-        // Compute next lod dimensions
-        switch (imageInfo->type)
-        {
-            case CL_MEM_OBJECT_IMAGE3D: depth = (depth >> 1) ? (depth >> 1) : 1;
-            case CL_MEM_OBJECT_IMAGE2D:
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                height = (height >> 1) ? (height >> 1) : 1;
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            case CL_MEM_OBJECT_IMAGE1D: width = (width >> 1) ? (width >> 1) : 1;
-        }
+      retOffset += (size_t) width * height * imageInfo->arraySize * get_pixel_size( imageInfo->format );
+      break;
+      retOffset += (size_t) width * height * depth * get_pixel_size( imageInfo->format );
+      break;
+      retOffset += (size_t) width * imageInfo->arraySize * get_pixel_size( imageInfo->format );
+      break;
+      retOffset += (size_t) width * height * get_pixel_size( imageInfo->format );
+      break;
+      retOffset += (size_t) width * get_pixel_size( imageInfo->format );
+      break;
-    return retOffset;
-const char *convert_image_type_to_string(cl_mem_object_type image_type)
-    switch (image_type)
+    // Compute next lod dimensions
+    switch(imageInfo->type)
-        case CL_MEM_OBJECT_IMAGE1D: return "1D";
-        case CL_MEM_OBJECT_IMAGE2D: return "2D";
-        case CL_MEM_OBJECT_IMAGE3D: return "3D";
-        case CL_MEM_OBJECT_IMAGE1D_ARRAY: return "1D array";
-        case CL_MEM_OBJECT_IMAGE2D_ARRAY: return "2D array";
-        case CL_MEM_OBJECT_IMAGE1D_BUFFER: return "1D image buffer";
-        default: return "unrecognized object type";
+      depth = ( depth >> 1 ) ? ( depth >> 1 ) : 1;
+      height = ( height >> 1 ) ? ( height >> 1 ) : 1;
+      width = ( width >> 1 ) ? ( width >> 1 ) : 1;
+  }
+  return retOffset;
diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h
index 848ec65..26c9760 100644
--- a/test_common/harness/imageHelpers.h
+++ b/test_common/harness/imageHelpers.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -41,13 +41,10 @@
 #include "rounding_mode.h"
 #include "clImageHelper.h"
-#include <CL/cl_half.h>
 extern cl_device_type gDeviceType;
 extern bool gTestRounding;
-// Number of iterations per image format to test if not testing max images,
-// rounding, or small images
+// Number of iterations per image format to test if not testing max images, rounding, or small images
@@ -56,64 +53,51 @@
 // Definition for our own sampler type, to mirror the cl_sampler internals
-typedef struct
-    cl_addressing_mode addressing_mode;
-    cl_filter_mode filter_mode;
-    bool normalized_coords;
+typedef struct {
+ cl_addressing_mode addressing_mode;
+ cl_filter_mode     filter_mode;
+ bool               normalized_coords;
 } image_sampler_data;
-int round_to_even(float v);
+int round_to_even( float v );
-#define NORMALIZE(v, max) (v < 0 ? 0 : (v > 1.f ? max : round_to_even(v * max)))
-#define NORMALIZE_UNROUNDED(v, max) (v < 0 ? 0 : (v > 1.f ? max : v * max))
-#define NORMALIZE_SIGNED(v, min, max)                                          \
-    (v < -1.0f ? min : (v > 1.f ? max : round_to_even(v * max)))
-#define NORMALIZE_SIGNED_UNROUNDED(v, min, max)                                \
-    (v < -1.0f ? min : (v > 1.f ? max : v * max))
-#define CONVERT_INT(v, min, max, max_val)                                      \
-    (v < min ? min : (v > max ? max_val : round_to_even(v)))
-#define CONVERT_UINT(v, max, max_val)                                          \
-    (v < 0 ? 0 : (v > max ? max_val : round_to_even(v)))
+#define NORMALIZE( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : round_to_even( v * max ) ) )
+#define NORMALIZE_UNROUNDED( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max :  v * max ) )
+#define NORMALIZE_SIGNED( v, min, max ) ( v  < -1.0f ? min : ( v > 1.f ? max : round_to_even( v * max ) ) )
+#define NORMALIZE_SIGNED_UNROUNDED( v, min, max ) ( v  < -1.0f ? min : ( v > 1.f ? max : v * max ) )
+#define CONVERT_INT( v, min, max, max_val)  ( v < min ? min : ( v > max ? max_val : round_to_even( v ) ) )
+#define CONVERT_UINT( v, max, max_val)  ( v < 0 ? 0 : ( v > max ? max_val : round_to_even( v ) ) )
-extern void print_read_header(const cl_image_format *format,
-                              image_sampler_data *sampler, bool err = false,
-                              int t = 0);
-extern void print_write_header(const cl_image_format *format, bool err);
-extern void print_header(const cl_image_format *format, bool err);
-extern bool find_format(cl_image_format *formatList, unsigned int numFormats,
-                        cl_image_format *formatToFind);
-extern bool is_image_format_required(cl_image_format format, cl_mem_flags flags,
+extern void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err = false, int t = 0 );
+extern void print_write_header( cl_image_format *format, bool err);
+extern void print_header( cl_image_format *format, bool err );
+extern bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind );
+extern bool is_image_format_required(cl_image_format format,
+                                     cl_mem_flags flags,
                                      cl_mem_object_type image_type,
                                      cl_device_id device);
-extern void
-build_required_image_formats(cl_mem_flags flags, cl_mem_object_type image_type,
-                             cl_device_id device,
-                             std::vector<cl_image_format> &formatsToSupport);
+extern void build_required_image_formats(cl_mem_flags flags,
+                                         cl_mem_object_type image_type,
+                                         cl_device_id device,
+                                         std::vector<cl_image_format>& formatsToSupport);
-extern uint32_t get_format_type_size(const cl_image_format *format);
-extern uint32_t get_channel_data_type_size(cl_channel_type channelType);
-extern uint32_t get_format_channel_count(const cl_image_format *format);
-extern uint32_t get_channel_order_channel_count(cl_channel_order order);
-cl_channel_type get_channel_type_from_name(const char *name);
-cl_channel_order get_channel_order_from_name(const char *name);
-extern int is_format_signed(const cl_image_format *format);
-extern uint32_t get_pixel_size(const cl_image_format *format);
+extern size_t get_format_type_size( const cl_image_format *format );
+extern size_t get_channel_data_type_size( cl_channel_type channelType );
+extern size_t get_format_channel_count( const cl_image_format *format );
+extern size_t get_channel_order_channel_count( cl_channel_order order );
+cl_channel_type  get_channel_type_from_name( const char *name );
+cl_channel_order  get_channel_order_from_name( const char *name );
+extern int    is_format_signed( const cl_image_format *format );
+extern size_t get_pixel_size( cl_image_format *format );
 /* Helper to get any ol image format as long as it is 8-bits-per-channel */
-extern int get_8_bit_image_format(cl_context context,
-                                  cl_mem_object_type objType,
-                                  cl_mem_flags flags, size_t channelCount,
-                                  cl_image_format *outFormat);
+extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
 /* Helper to get any ol image format as long as it is 32-bits-per-channel */
-extern int get_32_bit_image_format(cl_context context,
-                                   cl_mem_object_type objType,
-                                   cl_mem_flags flags, size_t channelCount,
-                                   cl_image_format *outFormat);
+extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
-int random_in_range(int minV, int maxV, MTdata d);
-int random_log_in_range(int minV, int maxV, MTdata d);
+int random_in_range( int minV, int maxV, MTdata d );
+int random_log_in_range( int minV, int maxV, MTdata d );
 typedef struct
@@ -123,7 +107,7 @@
     size_t rowPitch;
     size_t slicePitch;
     size_t arraySize;
-    const cl_image_format *format;
+    cl_image_format *format;
     cl_mem buffer;
     cl_mem_object_type type;
     cl_uint num_mip_levels;
@@ -132,394 +116,376 @@
 typedef struct
     float p[4];
-} FloatPixel;
-void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
-                                        const char *destPixel,
-                                        image_descriptor *imageInfo, size_t y,
-                                        size_t thirdDim);
 void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
-                   size_t sizes[][3], size_t maxWidth, size_t maxHeight,
-                   size_t maxDepth, size_t maxArraySize,
-                   const cl_ulong maxIndividualAllocSize,
-                   const cl_ulong maxTotalAllocSize,
-                   cl_mem_object_type image_type, const cl_image_format *format,
-                   int usingMaxPixelSize = 0);
-extern size_t get_format_max_int(const cl_image_format *format);
+                   size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
+                   const cl_ulong maxIndividualAllocSize, const cl_ulong maxTotalAllocSize, cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSize=0);
+extern size_t get_format_max_int( cl_image_format *format );
-extern cl_ulong get_image_size(image_descriptor const *imageInfo);
-extern cl_ulong get_image_size_mb(image_descriptor const *imageInfo);
+extern cl_ulong get_image_size( image_descriptor const *imageInfo );
+extern cl_ulong get_image_size_mb( image_descriptor const *imageInfo );
-extern char *generate_random_image_data(image_descriptor *imageInfo,
-                                        BufferOwningPtr<char> &Owner, MTdata d);
+extern char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &Owner, MTdata d );
-extern int debug_find_vector_in_image(void *imagePtr,
-                                      image_descriptor *imageInfo,
-                                      void *vectorToFind, size_t vectorSize,
-                                      int *outX, int *outY, int *outZ,
-                                      size_t lod = 0);
+extern int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                      void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod = 0 );
-extern int debug_find_pixel_in_image(void *imagePtr,
-                                     image_descriptor *imageInfo,
-                                     unsigned int *valuesToFind, int *outX,
-                                     int *outY, int *outZ, int lod = 0);
-extern int debug_find_pixel_in_image(void *imagePtr,
-                                     image_descriptor *imageInfo,
-                                     int *valuesToFind, int *outX, int *outY,
-                                     int *outZ, int lod = 0);
-extern int debug_find_pixel_in_image(void *imagePtr,
-                                     image_descriptor *imageInfo,
-                                     float *valuesToFind, int *outX, int *outY,
-                                     int *outZ, int lod = 0);
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     float *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
-extern void copy_image_data(image_descriptor *srcImageInfo,
-                            image_descriptor *dstImageInfo, void *imageValues,
-                            void *destImageValues, const size_t sourcePos[],
-                            const size_t destPos[], const size_t regionSize[]);
+extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
+                            const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] );
-int has_alpha(const cl_image_format *format);
+int has_alpha(cl_image_format *format);
 extern bool is_sRGBA_order(cl_channel_order image_channel_order);
-inline float calculate_array_index(float coord, float extent);
+inline float calculate_array_index( float coord, float extent );
-cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth);
-cl_ulong compute_mipmapped_image_size(image_descriptor imageInfo);
-size_t compute_mip_level_offset(image_descriptor *imageInfo, size_t lod);
+cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth);
+cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo);
+size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod);
-template <class T>
-void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
-                      int y, int z, T *outData, int lod)
+template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                         int x, int y, int z, T *outData, int lod )
-    size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
-           depth_lod = imageInfo->depth,
-           slice_pitch_lod = 0 /*imageInfo->slicePitch*/,
-           row_pitch_lod = 0 /*imageInfo->rowPitch*/;
-    width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+    float convert_half_to_float( unsigned short halfValue );
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth, slice_pitch_lod = 0/*imageInfo->slicePitch*/ , row_pitch_lod = 0/*imageInfo->rowPitch*/;
+    width_lod = ( imageInfo->width >> lod) ?( imageInfo->width >> lod):1;
-    if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY
-        && imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
-        height_lod =
-            (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+    if ( imageInfo->type  != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
+        height_lod = ( imageInfo->height >> lod) ?( imageInfo->height >> lod):1;
-    if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D)
-        depth_lod = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-    row_pitch_lod = (imageInfo->num_mip_levels > 0)
-        ? (width_lod * get_pixel_size(imageInfo->format))
-        : imageInfo->rowPitch;
-    slice_pitch_lod = (imageInfo->num_mip_levels > 0)
-        ? (row_pitch_lod * height_lod)
-        : imageInfo->slicePitch;
+    if(imageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+       depth_lod = ( imageInfo->depth >> lod) ? ( imageInfo->depth >> lod) : 1;
+    row_pitch_lod = (imageInfo->num_mip_levels > 0)? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
+    slice_pitch_lod = (imageInfo->num_mip_levels > 0)? (row_pitch_lod * height_lod): imageInfo->slicePitch;
     // correct depth_lod and height_lod for array image types in order to avoid
     // return
-    if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY && height_lod == 1
-        && depth_lod == 1)
-    {
-        depth_lod = 0;
-        height_lod = 0;
+    if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY && height_lod == 1 && depth_lod == 1) {
+    depth_lod = 0;
+    height_lod = 0;
-    if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && depth_lod == 1)
-    {
-        depth_lod = 0;
+    if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && depth_lod == 1) {
+      depth_lod = 0;
-    if (x < 0 || x >= (int)width_lod
-        || (height_lod != 0 && (y < 0 || y >= (int)height_lod))
-        || (depth_lod != 0 && (z < 0 || z >= (int)depth_lod))
-        || (imageInfo->arraySize != 0
-            && (z < 0 || z >= (int)imageInfo->arraySize)))
+    if ( x < 0 || x >= (int)width_lod
+               || ( height_lod != 0 && ( y < 0 || y >= (int)height_lod ) )
+               || ( depth_lod != 0 && ( z < 0 || z >= (int)depth_lod ) )
+               || ( imageInfo->arraySize != 0 && ( z < 0 || z >= (int)imageInfo->arraySize ) ) )
         // Border color
         if (imageInfo->format->image_channel_order == CL_DEPTH)
-            outData[0] = 1;
+            outData[ 0 ] = 1;
-        else
-        {
-            outData[0] = outData[1] = outData[2] = outData[3] = 0;
-            if (!has_alpha(imageInfo->format)) outData[3] = 1;
+        else {
+            outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
+            if (!has_alpha(imageInfo->format))
+                outData[3] = 1;
-    const cl_image_format *format = imageInfo->format;
+    cl_image_format *format = imageInfo->format;
     unsigned int i;
-    T tempData[4];
+    T tempData[ 4 ];
     // Advance to the right spot
     char *ptr = (char *)imageData;
-    size_t pixelSize = get_pixel_size(format);
+    size_t pixelSize = get_pixel_size( format );
     ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
     // OpenCL only supports reading floats from certain formats
-    switch (format->image_channel_data_type)
+    switch( format->image_channel_data_type )
-        case CL_SNORM_INT8: {
+        case CL_SNORM_INT8:
+        {
             cl_char *dPtr = (cl_char *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_UNORM_INT8: {
+        case CL_UNORM_INT8:
+        {
             cl_uchar *dPtr = (cl_uchar *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_SIGNED_INT8: {
+        case CL_SIGNED_INT8:
+        {
             cl_char *dPtr = (cl_char *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_UNSIGNED_INT8: {
-            cl_uchar *dPtr = (cl_uchar *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+        case CL_UNSIGNED_INT8:
+        {
+            cl_uchar *dPtr = (cl_uchar*)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_SNORM_INT16: {
+        case CL_SNORM_INT16:
+        {
             cl_short *dPtr = (cl_short *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_UNORM_INT16: {
+        case CL_UNORM_INT16:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_SIGNED_INT16: {
+        case CL_SIGNED_INT16:
+        {
             cl_short *dPtr = (cl_short *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_UNSIGNED_INT16: {
+        case CL_UNSIGNED_INT16:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_HALF_FLOAT: {
-            cl_half *dPtr = (cl_half *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)cl_half_to_float(dPtr[i]);
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)convert_half_to_float( dPtr[ i ] );
-        case CL_SIGNED_INT32: {
+        case CL_SIGNED_INT32:
+        {
             cl_int *dPtr = (cl_int *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_UNSIGNED_INT32: {
+        case CL_UNSIGNED_INT32:
+        {
             cl_uint *dPtr = (cl_uint *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_UNORM_SHORT_565: {
-            cl_ushort *dPtr = (cl_ushort *)ptr;
-            tempData[0] = (T)(dPtr[0] >> 11);
-            tempData[1] = (T)((dPtr[0] >> 5) & 63);
-            tempData[2] = (T)(dPtr[0] & 31);
+        case CL_UNORM_SHORT_565:
+        {
+            cl_ushort *dPtr = (cl_ushort*)ptr;
+            tempData[ 0 ] = (T)( dPtr[ 0 ] >> 11 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
-        case CL_UNORM_SHORT_565_REV: {
+        case CL_UNORM_SHORT_565_REV:
+        {
             unsigned short *dPtr = (unsigned short *)ptr;
-            tempData[2] = (T)(dPtr[0] >> 11);
-            tempData[1] = (T)((dPtr[0] >> 5) & 63);
-            tempData[0] = (T)(dPtr[0] & 31);
+            tempData[ 2 ] = (T)( dPtr[ 0 ] >> 11 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
-        case CL_UNORM_SHORT_555_REV: {
+        case CL_UNORM_SHORT_555_REV:
+        {
             unsigned short *dPtr = (unsigned short *)ptr;
-            tempData[2] = (T)((dPtr[0] >> 10) & 31);
-            tempData[1] = (T)((dPtr[0] >> 5) & 31);
-            tempData[0] = (T)(dPtr[0] & 31);
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
-        case CL_UNORM_INT_8888: {
+        case CL_UNORM_INT_8888:
+        {
             unsigned int *dPtr = (unsigned int *)ptr;
-            tempData[3] = (T)(dPtr[0] >> 24);
-            tempData[2] = (T)((dPtr[0] >> 16) & 0xff);
-            tempData[1] = (T)((dPtr[0] >> 8) & 0xff);
-            tempData[0] = (T)(dPtr[0] & 0xff);
+            tempData[ 3 ] = (T)( dPtr[ 0 ] >> 24 );
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 0xff );
-        case CL_UNORM_INT_8888_REV: {
+        case CL_UNORM_INT_8888_REV:
+        {
             unsigned int *dPtr = (unsigned int *)ptr;
-            tempData[0] = (T)(dPtr[0] >> 24);
-            tempData[1] = (T)((dPtr[0] >> 16) & 0xff);
-            tempData[2] = (T)((dPtr[0] >> 8) & 0xff);
-            tempData[3] = (T)(dPtr[0] & 0xff);
+            tempData[ 0 ] = (T)( dPtr[ 0 ] >> 24 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
+            tempData[ 3 ] = (T)( dPtr[ 0 ] & 0xff );
-        case CL_UNORM_INT_101010_REV: {
+        case CL_UNORM_INT_101010_REV:
+        {
             unsigned int *dPtr = (unsigned int *)ptr;
-            tempData[2] = (T)((dPtr[0] >> 20) & 0x3ff);
-            tempData[1] = (T)((dPtr[0] >> 10) & 0x3ff);
-            tempData[0] = (T)(dPtr[0] & 0x3ff);
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 0x3ff );
-        case CL_UNORM_SHORT_555: {
+        case CL_UNORM_SHORT_555:
+        {
             cl_ushort *dPtr = (cl_ushort *)ptr;
-            tempData[0] = (T)((dPtr[0] >> 10) & 31);
-            tempData[1] = (T)((dPtr[0] >> 5) & 31);
-            tempData[2] = (T)(dPtr[0] & 31);
+            tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
-        case CL_UNORM_INT_101010: {
+        case CL_UNORM_INT_101010:
+        {
             cl_uint *dPtr = (cl_uint *)ptr;
-            tempData[0] = (T)((dPtr[0] >> 20) & 0x3ff);
-            tempData[1] = (T)((dPtr[0] >> 10) & 0x3ff);
-            tempData[2] = (T)(dPtr[0] & 0x3ff);
+            tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 0x3ff );
-        case CL_FLOAT: {
+        case CL_FLOAT:
+        {
             cl_float *dPtr = (cl_float *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i];
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
-        case CL_SFIXED14_APPLE: {
+        case CL_SFIXED14_APPLE:
+        {
             cl_float *dPtr = (cl_float *)ptr;
-            for (i = 0; i < get_format_channel_count(format); i++)
-                tempData[i] = (T)dPtr[i] + 0x4000;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ] + 0x4000;
-    outData[0] = outData[1] = outData[2] = 0;
-    outData[3] = 1;
+    outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
+    outData[ 3 ] = 1;
-    if (format->image_channel_order == CL_A)
+    if( format->image_channel_order == CL_A )
-        outData[3] = tempData[0];
+        outData[ 3 ] = tempData[ 0 ];
-    else if (format->image_channel_order == CL_R)
+    else if( format->image_channel_order == CL_R   )
-        outData[0] = tempData[0];
+        outData[ 0 ] = tempData[ 0 ];
-    else if (format->image_channel_order == CL_Rx)
+    else if( format->image_channel_order == CL_Rx   )
-        outData[0] = tempData[0];
+        outData[ 0 ] = tempData[ 0 ];
-    else if (format->image_channel_order == CL_RA)
+    else if( format->image_channel_order == CL_RA )
-        outData[0] = tempData[0];
-        outData[3] = tempData[1];
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 1 ];
-    else if (format->image_channel_order == CL_RG)
+    else if( format->image_channel_order == CL_RG  )
-        outData[0] = tempData[0];
-        outData[1] = tempData[1];
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
-    else if (format->image_channel_order == CL_RGx)
+    else if( format->image_channel_order == CL_RGx  )
-        outData[0] = tempData[0];
-        outData[1] = tempData[1];
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
-    else if ((format->image_channel_order == CL_RGB)
-             || (format->image_channel_order == CL_sRGB))
+    else if(( format->image_channel_order == CL_RGB  ) || ( format->image_channel_order == CL_sRGB  ))
-        outData[0] = tempData[0];
-        outData[1] = tempData[1];
-        outData[2] = tempData[2];
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
-    else if ((format->image_channel_order == CL_RGBx)
-             || (format->image_channel_order == CL_sRGBx))
+    else if(( format->image_channel_order == CL_RGBx  ) || ( format->image_channel_order == CL_sRGBx  ))
-        outData[0] = tempData[0];
-        outData[1] = tempData[1];
-        outData[2] = tempData[2];
-        outData[3] = 0;
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+        outData[ 3 ] = 0;
-    else if ((format->image_channel_order == CL_RGBA)
-             || (format->image_channel_order == CL_sRGBA))
+    else if(( format->image_channel_order == CL_RGBA ) || ( format->image_channel_order == CL_sRGBA ))
-        outData[0] = tempData[0];
-        outData[1] = tempData[1];
-        outData[2] = tempData[2];
-        outData[3] = tempData[3];
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+        outData[ 3 ] = tempData[ 3 ];
-    else if (format->image_channel_order == CL_ARGB)
+    else if( format->image_channel_order == CL_ARGB )
-        outData[0] = tempData[1];
-        outData[1] = tempData[2];
-        outData[2] = tempData[3];
-        outData[3] = tempData[0];
+        outData[ 0 ] = tempData[ 1 ];
+        outData[ 1 ] = tempData[ 2 ];
+        outData[ 2 ] = tempData[ 3 ];
+        outData[ 3 ] = tempData[ 0 ];
-    else if ((format->image_channel_order == CL_BGRA)
-             || (format->image_channel_order == CL_sBGRA))
+    else if(( format->image_channel_order == CL_BGRA ) || ( format->image_channel_order == CL_sBGRA ))
-        outData[0] = tempData[2];
-        outData[1] = tempData[1];
-        outData[2] = tempData[0];
-        outData[3] = tempData[3];
+        outData[ 0 ] = tempData[ 2 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 3 ];
-    else if (format->image_channel_order == CL_INTENSITY)
+    else if( format->image_channel_order == CL_INTENSITY )
-        outData[0] = tempData[0];
-        outData[1] = tempData[0];
-        outData[2] = tempData[0];
-        outData[3] = tempData[0];
+        outData[ 1 ] = tempData[ 0 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 0 ];
-    else if (format->image_channel_order == CL_LUMINANCE)
+    else if( format->image_channel_order == CL_LUMINANCE )
-        outData[0] = tempData[0];
-        outData[1] = tempData[0];
-        outData[2] = tempData[0];
+        outData[ 1 ] = tempData[ 0 ];
+        outData[ 2 ] = tempData[ 0 ];
-    else if (format->image_channel_order == CL_DEPTH)
+    else if( format->image_channel_order == CL_DEPTH  )
-        outData[0] = tempData[0];
+        outData[ 0 ] = tempData[ 0 ];
 #ifdef CL_1RGB_APPLE
-    else if (format->image_channel_order == CL_1RGB_APPLE)
+    else if( format->image_channel_order == CL_1RGB_APPLE )
-        outData[0] = tempData[1];
-        outData[1] = tempData[2];
-        outData[2] = tempData[3];
-        outData[3] = 0xff;
+        outData[ 0 ] = tempData[ 1 ];
+        outData[ 1 ] = tempData[ 2 ];
+        outData[ 2 ] = tempData[ 3 ];
+        outData[ 3 ] = 0xff;
 #ifdef CL_BGR1_APPLE
-    else if (format->image_channel_order == CL_BGR1_APPLE)
+    else if( format->image_channel_order == CL_BGR1_APPLE )
-        outData[0] = tempData[2];
-        outData[1] = tempData[1];
-        outData[2] = tempData[0];
-        outData[3] = 0xff;
+        outData[ 0 ] = tempData[ 2 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = 0xff;
@@ -529,32 +495,27 @@
-template <class T>
-void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
-                      int y, int z, T *outData)
+template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                         int x, int y, int z, T *outData )
-    read_image_pixel<T>(imageData, imageInfo, x, y, z, outData, 0);
+  read_image_pixel<T>( imageData, imageInfo, x, y, z, outData, 0);
 // Stupid template rules
-bool get_integer_coords(float x, float y, float z, size_t width, size_t height,
-                        size_t depth, image_sampler_data *imageSampler,
-                        image_descriptor *imageInfo, int &outX, int &outY,
-                        int &outZ);
-bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset,
-                               float yAddressOffset, float zAddressOffset,
+bool get_integer_coords( float x, float y, float z,
+                        size_t width, size_t height, size_t depth,
+                        image_sampler_data *imageSampler, image_descriptor *imageInfo,
+                        int &outX, int &outY, int &outZ );
+bool get_integer_coords_offset( float x, float y, float z,
+                               float xAddressOffset, float yAddressOffset, float zAddressOffset,
                                size_t width, size_t height, size_t depth,
-                               image_sampler_data *imageSampler,
-                               image_descriptor *imageInfo, int &outX,
-                               int &outY, int &outZ);
+                               image_sampler_data *imageSampler, image_descriptor *imageInfo,
+                               int &outX, int &outY, int &outZ );
-template <class T>
-void sample_image_pixel_offset(void *imageData, image_descriptor *imageInfo,
-                               float x, float y, float z, float xAddressOffset,
-                               float yAddressOffset, float zAddressOffset,
-                               image_sampler_data *imageSampler, T *outData,
-                               int lod)
+template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
+                                                  float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                                  image_sampler_data *imageSampler, T *outData, int lod )
     int iX = 0, iY = 0, iZ = 0;
@@ -562,8 +523,7 @@
     float max_h;
     float max_d;
-    switch (imageInfo->type)
-    {
+    switch (imageInfo->type) {
             max_h = imageInfo->arraySize;
             max_d = 0;
@@ -578,155 +538,120 @@
-    if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
+    if( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
-        switch (imageInfo->type)
-        {
+        switch (imageInfo->type) {
             case CL_MEM_OBJECT_IMAGE3D:
-                max_d = (float)((imageInfo->depth >> lod)
-                                    ? (imageInfo->depth >> lod)
-                                    : 1);
+                max_d = (float)((imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1);
             case CL_MEM_OBJECT_IMAGE2D:
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                max_h = (float)((imageInfo->height >> lod)
-                                    ? (imageInfo->height >> lod)
-                                    : 1);
+                max_h = (float)((imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1);
-            default:;
+            default:
+                ;
-        max_w =
-            (float)((imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1);
+        max_w = (float)((imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1);
-    get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset,
-                              zAddressOffset, max_w, max_h, max_d, imageSampler,
-                              imageInfo, iX, iY, iZ);
+    get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, max_w, max_h, max_d, imageSampler, imageInfo, iX, iY, iZ );
-    read_image_pixel<T>(imageData, imageInfo, iX, iY, iZ, outData, lod);
+    read_image_pixel<T>( imageData, imageInfo, iX, iY, iZ, outData, lod );
-template <class T>
-void sample_image_pixel_offset(void *imageData, image_descriptor *imageInfo,
-                               float x, float y, float z, float xAddressOffset,
-                               float yAddressOffset, float zAddressOffset,
-                               image_sampler_data *imageSampler, T *outData)
+template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
+                                                  float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                                  image_sampler_data *imageSampler, T *outData)
-    sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, xAddressOffset,
-                                 yAddressOffset, zAddressOffset, imageSampler,
-                                 outData, 0);
+  sample_image_pixel_offset<T>( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
+    imageSampler,  outData, 0);
-template <class T>
-void sample_image_pixel(void *imageData, image_descriptor *imageInfo, float x,
-                        float y, float z, image_sampler_data *imageSampler,
-                        T *outData)
+template <class T> void sample_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, image_sampler_data *imageSampler, T *outData )
-    return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f,
-                                        0.0f, 0.0f, imageSampler, outData);
+    return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData);
-sample_image_pixel_float(void *imageData, image_descriptor *imageInfo, float x,
-                         float y, float z, image_sampler_data *imageSampler,
-                         float *outData, int verbose, int *containsDenorms);
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
-FloatPixel sample_image_pixel_float(void *imageData,
-                                    image_descriptor *imageInfo, float x,
-                                    float y, float z,
-                                    image_sampler_data *imageSampler,
-                                    float *outData, int verbose,
-                                    int *containsDenorms, int lod);
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
-FloatPixel sample_image_pixel_float_offset(
-    void *imageData, image_descriptor *imageInfo, float x, float y, float z,
-    float xAddressOffset, float yAddressOffset, float zAddressOffset,
-    image_sampler_data *imageSampler, float *outData, int verbose,
-    int *containsDenorms);
-FloatPixel sample_image_pixel_float_offset(
-    void *imageData, image_descriptor *imageInfo, float x, float y, float z,
-    float xAddressOffset, float yAddressOffset, float zAddressOffset,
-    image_sampler_data *imageSampler, float *outData, int verbose,
-    int *containsDenorms, int lod);
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
-extern void pack_image_pixel(unsigned int *srcVector,
-                             const cl_image_format *imageFormat, void *outData);
-extern void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat,
-                             void *outData);
-extern void pack_image_pixel(float *srcVector,
-                             const cl_image_format *imageFormat, void *outData);
-extern void pack_image_pixel_error(const float *srcVector,
-                                   const cl_image_format *imageFormat,
-                                   const void *results, float *errors);
+extern void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results,  float *errors );
-extern char *create_random_image_data(ExplicitType dataType,
-                                      image_descriptor *imageInfo,
-                                      BufferOwningPtr<char> &P, MTdata d,
-                                      bool image2DFromBuffer = false);
+extern char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer = false );
 // deprecated
-// extern bool clamp_image_coord( image_sampler_data *imageSampler, float value,
-// size_t max, int &outValue );
+//extern bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue );
-extern void get_sampler_kernel_code(image_sampler_data *imageSampler,
-                                    char *outLine);
-extern float get_max_absolute_error(const cl_image_format *format,
-                                    image_sampler_data *sampler);
-extern float get_max_relative_error(const cl_image_format *format,
-                                    image_sampler_data *sampler, int is3D,
-                                    int isLinearFilter);
+extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
+extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
+extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
-#define errMax(_x, _y) ((_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y))
+#define errMax( _x , _y )       ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
-static inline cl_uint abs_diff_uint(cl_uint x, cl_uint y)
+static inline cl_uint abs_diff_uint( cl_uint x, cl_uint y )
     return y > x ? y - x : x - y;
-static inline cl_uint abs_diff_int(cl_int x, cl_int y)
+static inline cl_uint abs_diff_int( cl_int x, cl_int y )
-    return (cl_uint)(y > x ? y - x : x - y);
+    return (cl_uint) (y > x ? y - x : x - y);
-static inline cl_float relative_error(float test, float expected)
+static inline cl_float relative_error( float test, float expected )
     // 0-0/0 is 0 in this case, not NaN
-    if (test == 0.0f && expected == 0.0f) return 0.0f;
+    if( test == 0.0f && expected == 0.0f )
+        return 0.0f;
     return (test - expected) / expected;
 extern float random_float(float low, float high);
-class CoordWalker {
+class CoordWalker
-    CoordWalker(void *coords, bool useFloats, size_t vecSize);
+    CoordWalker( void * coords, bool useFloats, size_t vecSize );
-    cl_float Get(size_t idx, size_t el);
+    cl_float    Get( size_t idx, size_t el );
-    cl_float *mFloatCoords;
-    cl_int *mIntCoords;
-    size_t mVecSize;
+    cl_float * mFloatCoords;
+    cl_int * mIntCoords;
+    size_t    mVecSize;
-extern cl_half convert_float_to_half(float f);
-extern int DetectFloatToHalfRoundingMode(
-    cl_command_queue); // Returns CL_SUCCESS on success
+extern int  DetectFloatToHalfRoundingMode( cl_command_queue );  // Returns CL_SUCCESS on success
 // sign bit: don't care, exponent: maximum value, significand: non-zero
-static int inline is_half_nan(cl_half half) { return (half & 0x7fff) > 0x7c00; }
+static int inline is_half_nan( cl_ushort half ){ return ( half & 0x7fff ) > 0x7c00; }
 // sign bit: don't care, exponent: zero, significand: non-zero
-static int inline is_half_denorm(cl_half half) { return IsHalfSubnormal(half); }
+static int inline is_half_denorm( cl_ushort half ){ return IsHalfSubnormal( half ); }
 // sign bit: don't care, exponent: zero, significand: zero
-static int inline is_half_zero(cl_half half) { return (half & 0x7fff) == 0; }
+static int inline is_half_zero( cl_ushort half ){ return ( half & 0x7fff ) == 0; }
+cl_ushort convert_float_to_half( cl_float f );
+cl_float  convert_half_to_float( cl_ushort h );
 extern double sRGBmap(float fc);
-extern const char *convert_image_type_to_string(cl_mem_object_type imageType);
 #endif // _imageHelpers_h
diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index 95b9555..1f76f36 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -28,8 +28,6 @@
 #include <fstream>
 #include <sstream>
 #include <iomanip>
-#include <mutex>
-#include <algorithm>
 #if defined(_WIN32)
 std::string slash = "\\";
@@ -37,23 +35,20 @@
 std::string slash = "/";
-static std::mutex gCompilerMutex;
-static cl_int get_first_device_id(const cl_context context,
-                                  cl_device_id &device);
+static cl_int get_first_device_id(const cl_context context, cl_device_id &device);
 long get_file_size(const std::string &fileName)
     std::ifstream ifs(fileName.c_str(), std::ios::binary);
-    if (!ifs.good()) return 0;
+    if (!ifs.good())
+        return 0;
     // get length of file:
     ifs.seekg(0, std::ios::end);
     std::ios::pos_type length = ifs.tellg();
     return static_cast<long>(length);
-static std::string get_kernel_content(unsigned int numKernelLines,
-                                      const char *const *kernelProgram)
+static std::string get_kernel_content(unsigned int numKernelLines, const char *const *kernelProgram)
     std::string kernel;
     for (size_t i = 0; i < numKernelLines; ++i)
@@ -77,11 +72,11 @@
         if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
             pos -= 2;
-        // check character before 'kernel' (white space expected)
+        //check character before 'kernel' (white space expected)
         size_t wsPos = source.find_last_of(" \t\r\n", pos);
         if (wsPos == std::string::npos || wsPos + 1 == pos)
-            // check character after 'kernel' (white space expected)
+            //check character after 'kernel' (white space expected)
             size_t akPos = kPos + sizeof("kernel") - 1;
             wsPos = source.find_first_of(" \t\r\n", akPos);
             if (!(wsPos == akPos))
@@ -96,19 +91,21 @@
                 attributeFound = false;
                 // find '(' after kernel name name
                 size_t pPos = source.find("(", akPos);
-                if (!(pPos != std::string::npos)) continue;
+                if (!(pPos != std::string::npos))
+                    continue;
                 // check for not empty kernel name before '('
                 pos = source.find_last_not_of(" \t\r\n", pPos - 1);
-                if (!(pos != std::string::npos && pos > akPos)) continue;
+                if (!(pos != std::string::npos && pos > akPos))
+                    continue;
-                // find character before kernel name
+                //find character before kernel name
                 wsPos = source.find_last_of(" \t\r\n", pos);
-                if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
+                if (!(wsPos != std::string::npos && wsPos >= akPos))
+                    continue;
-                std::string name =
-                    source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
-                // check for kernel attribute
+                std::string name = source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
+                //check for kernel attribute
                 if (name == "__attribute__")
                     attributeFound = true;
@@ -148,17 +145,20 @@
     return oss.str();
-static std::string
-get_offline_compilation_file_type_str(const CompilationMode compilationMode)
+static std::string get_offline_compilation_file_type_str(const CompilationMode compilationMode)
     switch (compilationMode)
-        default: assert(0 && "Invalid compilation mode"); abort();
+        default:
+            assert(0 && "Invalid compilation mode");
+            abort();
         case kOnline:
             assert(0 && "Invalid compilation mode for offline compilation");
-        case kBinary: return "binary";
-        case kSpir_v: return "SPIR-V";
+        case kBinary:
+            return "binary";
+        case kSpir_v:
+            return "SPIR-V";
@@ -170,40 +170,34 @@
     std::string kernelName = get_kernel_name(kernel);
     cl_uint kernelCrc = crc32(, kernel.size());
     std::ostringstream oss;
-    oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
-        << kernelCrc;
-    if (buildOptions)
-    {
+    oss << kernelName <<  std::hex << std::setfill('0') << std::setw(8) << kernelCrc;
+    if(buildOptions) {
         cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
-        oss << '.' << std::hex << std::setfill('0') << std::setw(8)
-            << bOptionsCrc;
+        oss << '.' << std::hex << std::setfill('0') << std::setw(8) << bOptionsCrc;
     return oss.str();
-static std::string
-get_cl_build_options_filename_with_path(const std::string &filePath,
-                                        const std::string &fileNamePrefix)
+static std::string 
+get_cl_build_options_filename_with_path(const std::string& filePath,
+                                        const std::string& fileNamePrefix) {
     return filePath + slash + fileNamePrefix + ".options";
-static std::string
-get_cl_source_filename_with_path(const std::string &filePath,
-                                 const std::string &fileNamePrefix)
+static std::string 
+get_cl_source_filename_with_path(const std::string& filePath,
+                                 const std::string& fileNamePrefix) {
     return filePath + slash + fileNamePrefix + ".cl";
-static std::string
-get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
-                              const std::string &filePath,
-                              const std::string &fileNamePrefix)
+static std::string 
+get_binary_filename_with_path(CompilationMode mode,
+                              cl_uint deviceAddrSpaceSize,
+                              const std::string& filePath,
+                              const std::string& fileNamePrefix) {
     std::string binaryFilename = filePath + slash + fileNamePrefix;
-    if (kSpir_v == mode)
-    {
+    if(kSpir_v == mode) {
         std::ostringstream extension;
         extension << ".spv" << deviceAddrSpaceSize;
         binaryFilename += extension.str();
@@ -211,43 +205,39 @@
     return binaryFilename;
-static bool file_exist_on_disk(const std::string &filePath,
-                               const std::string &fileName)
+static bool file_exist_on_disk(const std::string& filePath,
+                               const std::string& fileName) {
     std::string fileNameWithPath = filePath + slash + fileName;
     bool exist = false;
     std::ifstream ifs;, std::ios::binary);
-    if (ifs.good()) exist = true;
+    if(ifs.good())
+        exist = true;
     return exist;
 static bool should_save_kernel_source_to_disk(CompilationMode mode,
                                               CompilationCacheMode cacheMode,
-                                              const std::string &binaryPath,
-                                              const std::string &binaryName)
+                                              const std::string& binaryPath, 
+                                              const std::string& binaryName)
     bool saveToDisk = false;
-    if (cacheMode == kCacheModeDumpCl
-        || (cacheMode == kCacheModeOverwrite && mode != kOnline))
-    {
+    if(cacheMode == kCacheModeDumpCl ||
+       (cacheMode == kCacheModeOverwrite && mode != kOnline)) {
         saveToDisk = true;
-    if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
-    {
+    if(cacheMode == kCacheModeCompileIfAbsent && mode != kOnline) {
         saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
     return saveToDisk;
-static int save_kernel_build_options_to_disk(const std::string &path,
-                                             const std::string &prefix,
-                                             const char *buildOptions)
-    std::string filename =
-        get_cl_build_options_filename_with_path(path, prefix);
+static int save_kernel_build_options_to_disk(const std::string& path,
+                                             const std::string& prefix,
+                                             const char *buildOptions) {
+    std::string filename = get_cl_build_options_filename_with_path(path, prefix);
     std::ofstream ofs(filename.c_str(), std::ios::binary);
     if (!ofs.good())
@@ -260,10 +250,9 @@
     return CL_SUCCESS;
-static int save_kernel_source_to_disk(const std::string &path,
-                                      const std::string &prefix,
-                                      const std::string &source)
+static int save_kernel_source_to_disk(const std::string& path,
+                                      const std::string& prefix,
+                                      const std::string& source) {
     std::string filename = get_cl_source_filename_with_path(path, prefix);
     std::ofstream ofs(filename.c_str(), std::ios::binary);
     if (!ofs.good())
@@ -277,86 +266,123 @@
     return CL_SUCCESS;
-static int
-save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
-                                       const char *const *kernelProgram,
-                                       const char *buildOptions)
+static int save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
+                                                  const char *const *kernelProgram,
+                                                  const char *buildOptions)
     int error;
     std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
-    std::string kernelNamePrefix =
-        get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
+    std::string kernelNamePrefix = get_unique_filename_prefix(numKernelLines,
+                                                             kernelProgram,
+                                                             buildOptions);
     // save kernel source to disk
-    error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
-                                       kernel);
+    error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix, kernel);
     // save kernel build options to disk if exists
     if (buildOptions != NULL)
-        error |= save_kernel_build_options_to_disk(
-            gCompilationCachePath, kernelNamePrefix, buildOptions);
+        error |= save_kernel_build_options_to_disk(gCompilationCachePath, kernelNamePrefix, buildOptions);
     return error;
-static std::string
-get_compilation_mode_str(const CompilationMode compilationMode)
+static std::string get_compilation_mode_str(const CompilationMode compilationMode)
     switch (compilationMode)
-        default: assert(0 && "Invalid compilation mode"); abort();
-        case kOnline: return "online";
-        case kBinary: return "binary";
-        case kSpir_v: return "spir-v";
+        default:
+            assert(0 && "Invalid compilation mode");
+            abort();
+        case kOnline:
+            return "online";
+        case kBinary:
+            return "binary";
+        case kSpir_v:
+            return "spir-v";
-static cl_int get_cl_device_info_str(const cl_device_id device,
-                                     const cl_uint device_address_space_size,
-                                     const CompilationMode compilationMode,
-                                     std::string &clDeviceInfo)
+static std::string get_khronos_compiler_command(const cl_uint device_address_space_size,
+                                                const bool openclCXX,
+                                                const std::string &bOptions,
+                                                const std::string &sourceFilename,
+                                                const std::string &outputFilename)
+    // Set compiler options
+    // Emit SPIR-V
+    std::string compilerOptions = " -cc1 -emit-spirv";
+    // <triple>: for 32 bit SPIR-V use spir-unknown-unknown, for 64 bit SPIR-V use spir64-unknown-unknown.
+    if(device_address_space_size == 32)
+    {
+        compilerOptions += " -triple=spir-unknown-unknown";
+    }
+    else
+    {
+        compilerOptions += " -triple=spir64-unknown-unknown";
+    }
+    // Set OpenCL C++ flag required by SPIR-V-ready clang (compiler provided by Khronos)
+    if(openclCXX)
+    {
+        compilerOptions = compilerOptions + " -cl-std=c++";
+    }
+    // Set correct includes
+    if(openclCXX)
+    {
+        compilerOptions += " -I ";
+        compilerOptions += STRINGIFY_VALUE(CL_LIBCLCXX_DIR);
+    }
+    else
+    {
+        compilerOptions += " -include opencl.h";
+    }
+    // Add build options passed to this function
+    compilerOptions += " " + bOptions;
+    compilerOptions +=
+        " " + sourceFilename +
+        " -o " + outputFilename;
+    std::string runString = STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER) + compilerOptions;
+    return runString;
+static cl_int get_cl_device_info_str(const cl_device_id device, const cl_uint device_address_space_size,
+                                     const CompilationMode compilationMode, std::string &clDeviceInfo)
     std::string extensionsString = get_device_extensions_string(device);
     std::string versionString = get_device_version_string(device);
     std::ostringstream clDeviceInfoStream;
-    std::string file_type =
-        get_offline_compilation_file_type_str(compilationMode);
-    clDeviceInfoStream << "# OpenCL device info affecting " << file_type
-                       << " offline compilation:" << std::endl
-                       << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
-                       << std::endl
-                       << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
-                       << std::endl;
+    std::string file_type = get_offline_compilation_file_type_str(compilationMode);
+    clDeviceInfoStream << "# OpenCL device info affecting " << file_type << " offline compilation:" << std::endl
+                    << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size << std::endl
+                    << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\"" << std::endl;
     /* We only need the device's supported IL version(s) when compiling IL
-     * that will be loaded with clCreateProgramWithIL() */
+    * that will be loaded with clCreateProgramWithIL() */
     if (compilationMode == kSpir_v)
         std::string ilVersionString = get_device_il_version_string(device);
-        clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
-                           << "\"" << std::endl;
+        clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString << "\"" << std::endl;
-    clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
-                       << std::endl;
-    clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
-                       << (0 == checkForImageSupport(device)) << std::endl;
-    clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
-                       << "\"" << std::endl;
+    clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\"" << std::endl;
     clDeviceInfo = clDeviceInfoStream.str();
     return CL_SUCCESS;
-static int write_cl_device_info(const cl_device_id device,
-                                const cl_uint device_address_space_size,
-                                const CompilationMode compilationMode,
-                                std::string &clDeviceInfoFilename)
+static int write_cl_device_info(const cl_device_id device, const cl_uint device_address_space_size,
+                                const CompilationMode compilationMode, std::string &clDeviceInfoFilename)
     std::string clDeviceInfo;
-    int error = get_cl_device_info_str(device, device_address_space_size,
-                                       compilationMode, clDeviceInfo);
+    int error = get_cl_device_info_str(device, device_address_space_size, compilationMode, clDeviceInfo);
     if (error != CL_SUCCESS)
         return error;
@@ -365,30 +391,25 @@
     cl_uint crc = crc32(, clDeviceInfo.size());
     /* Get the filename for the clDeviceInfo file.
-     * Note: the file includes the hash on its content, so it is usually
-     * unnecessary to delete it. */
+     * Note: the file includes the hash on its content, so it is usually unnecessary to delete it. */
     std::ostringstream clDeviceInfoFilenameStream;
-    clDeviceInfoFilenameStream << gCompilationCachePath << slash
-                               << "clDeviceInfo-";
-    clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
-                               << crc << ".txt";
+    clDeviceInfoFilenameStream << gCompilationCachePath << slash << "clDeviceInfo-";
+    clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8) << crc << ".txt";
     clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
-    if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
+    if ((size_t) get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
         /* The CL device info file has already been created.
          * Nothing to do. */
         return 0;
-    /* The file does not exist or its length is not as expected.
-     * Create/overwrite it. */
+    /* The file does not exist or its length is not as expected.  Create/overwrite it. */
     std::ofstream ofs(clDeviceInfoFilename);
     if (!ofs.good())
-        log_info("OfflineCompiler: can't create CL device info file: %s\n",
-                 clDeviceInfoFilename.c_str());
+        log_info("OfflineCompiler: can't create CL device info file: %s\n", clDeviceInfoFilename.c_str());
         return -1;
     ofs << clDeviceInfo;
@@ -397,11 +418,12 @@
     return CL_SUCCESS;
-static std::string get_offline_compilation_command(
-    const cl_uint device_address_space_size,
-    const CompilationMode compilationMode, const std::string &bOptions,
-    const std::string &sourceFilename, const std::string &outputFilename,
-    const std::string &clDeviceInfoFilename)
+static std::string get_offline_compilation_command(const cl_uint device_address_space_size,
+                                                   const CompilationMode compilationMode,
+                                                   const std::string &bOptions,
+                                                   const std::string &sourceFilename,
+                                                   const std::string &outputFilename,
+                                                   const std::string &clDeviceInfoFilename)
     std::ostringstream wrapperOptions;
@@ -425,27 +447,45 @@
                                    const CompilationMode compilationMode,
                                    const std::string &bOptions,
                                    const std::string &sourceFilename,
-                                   const std::string &outputFilename)
+                                   const std::string &outputFilename,
+                                   const bool openclCXX)
     std::string runString;
-    std::string clDeviceInfoFilename;
-    // See cl_offline_compiler-interface.txt for a description of the
-    // format of the CL device information file generated below, and
-    // the internal command line interface for invoking the offline
-    // compiler.
-    cl_int err = write_cl_device_info(device, device_address_space_size,
-                                      compilationMode, clDeviceInfoFilename);
-    if (err != CL_SUCCESS)
+    if (openclCXX)
-        log_error("Failed writing CL device info file\n");
-        return err;
+        log_error("CL C++ compilation is not possible: KHRONOS_OFFLINE_COMPILER was not defined.\n");
+        return CL_INVALID_OPERATION;
+        if (compilationMode != kSpir_v)
+        {
+            log_error("Compilation mode must be SPIR-V for Khronos compiler");
+            return -1;
+        }
+        runString = get_khronos_compiler_command(device_address_space_size, openclCXX, bOptions,
+                                                 sourceFilename, outputFilename);
+    else
+    {
+        std::string clDeviceInfoFilename;
-    runString = get_offline_compilation_command(
-        device_address_space_size, compilationMode, bOptions, sourceFilename,
-        outputFilename, clDeviceInfoFilename);
+        // See cl_offline_compiler-interface.txt for a description of the
+        // format of the CL device information file generated below, and
+        // the internal command line interface for invoking the offline
+        // compiler.
+        cl_int err = write_cl_device_info(device, device_address_space_size, compilationMode,
+                                          clDeviceInfoFilename);
+        if (err != CL_SUCCESS)
+        {
+            log_error("Failed writing CL device info file\n");
+            return err;
+        }
+        runString = get_offline_compilation_command(device_address_space_size, compilationMode, bOptions,
+                                                    sourceFilename, outputFilename, clDeviceInfoFilename);
+    }
     // execute script
     log_info("Executing command: %s\n", runString.c_str());
@@ -460,12 +500,10 @@
     return CL_SUCCESS;
-static cl_int get_first_device_id(const cl_context context,
-                                  cl_device_id &device)
+static cl_int get_first_device_id(const cl_context context, cl_device_id &device)
     cl_uint numDevices = 0;
-    cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
-                                    sizeof(cl_uint), &numDevices, NULL);
+    cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL);
     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
     if (numDevices == 0)
@@ -475,60 +513,55 @@
     std::vector<cl_device_id> devices(numDevices, 0);
-    error =
-        clGetContextInfo(context, CL_CONTEXT_DEVICES,
-                         numDevices * sizeof(cl_device_id), &devices[0], NULL);
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDevices*sizeof(cl_device_id), &devices[0], NULL);
     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
     device = devices[0];
     return CL_SUCCESS;
-static cl_int get_device_address_bits(const cl_device_id device,
-                                      cl_uint &device_address_space_size)
+static cl_int get_device_address_bits(const cl_device_id device, cl_uint &device_address_space_size)
-    cl_int error =
-        clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
-                        &device_address_space_size, NULL);
+    cl_int error = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), &device_address_space_size, NULL);
     test_error(error, "Unable to obtain device address bits");
     if (device_address_space_size != 32 && device_address_space_size != 64)
-        log_error("ERROR: Unexpected number of device address bits: %u\n",
-                  device_address_space_size);
+        log_error("ERROR: Unexpected number of device address bits: %u\n", device_address_space_size);
         return -1;
     return CL_SUCCESS;
-static int get_offline_compiler_output(
-    std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
-    const CompilationMode compilationMode, const std::string &bOptions,
-    const std::string &kernelPath, const std::string &kernelNamePrefix)
+static int get_offline_compiler_output(std::ifstream &ifs,
+                                       const cl_device_id device,
+                                       cl_uint deviceAddrSpaceSize,
+                                       const bool openclCXX,
+                                       const CompilationMode compilationMode,
+                                       const std::string &bOptions,
+                                       const std::string &kernelPath,
+                                       const std::string &kernelNamePrefix)
-    std::string sourceFilename =
-        get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
-    std::string outputFilename = get_binary_filename_with_path(
-        compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
+    std::string sourceFilename = get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
+    std::string outputFilename = get_binary_filename_with_path(compilationMode,
+                                                               deviceAddrSpaceSize,
+                                                               kernelPath,
+                                                               kernelNamePrefix);, std::ios::binary);
-    if (!ifs.good())
-    {
-        std::string file_type =
-            get_offline_compilation_file_type_str(compilationMode);
-        if (gCompilationCacheMode == kCacheModeForceRead)
-        {
+    if(!ifs.good()) {
+       std::string file_type = get_offline_compilation_file_type_str(compilationMode);
+        if (gCompilationCacheMode == kCacheModeForceRead) {
             log_info("OfflineCompiler: can't open cached %s file: %s\n",
                      file_type.c_str(), outputFilename.c_str());
             return -1;
-        else
-        {
-            int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
-                                                compilationMode, bOptions,
-                                                sourceFilename, outputFilename);
-            if (error != CL_SUCCESS) return error;
+        else {
+            int error = invoke_offline_compiler(device, deviceAddrSpaceSize, compilationMode,
+                                                bOptions, sourceFilename, outputFilename, openclCXX);
+            if (error != CL_SUCCESS)
+                return error;
             // read output file
   , std::ios::binary);
@@ -538,18 +571,21 @@
                          file_type.c_str(), outputFilename.c_str());
                 return -1;
-        }
+       }
     return CL_SUCCESS;
-static int create_single_kernel_helper_create_program_offline(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char *const *kernelProgram,
-    const char *buildOptions, CompilationMode compilationMode)
+static int create_single_kernel_helper_create_program_offline(cl_context context,
+                                                              cl_device_id device,
+                                                              cl_program *outProgram,
+                                                              unsigned int numKernelLines,
+                                                              const char *const *kernelProgram,
+                                                              const char *buildOptions,
+                                                              const bool openclCXX,
+                                                              CompilationMode compilationMode)
-    if (kCacheModeDumpCl == gCompilationCacheMode)
-    {
+    if(kCacheModeDumpCl == gCompilationCacheMode) {
         return -1;
@@ -562,27 +598,40 @@
         test_error(error, "Failed to get device ID for first device");
     error = get_device_address_bits(device, device_address_space_size);
-    if (error != CL_SUCCESS) return error;
+    if (error != CL_SUCCESS)
+        return error;
     // set build options
     std::string bOptions;
     bOptions += buildOptions ? std::string(buildOptions) : "";
-    std::string kernelName =
-        get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
+    std::string kernelName = get_unique_filename_prefix(numKernelLines,
+                                                        kernelProgram,
+                                                        buildOptions);
     std::ifstream ifs;
-    error = get_offline_compiler_output(ifs, device, device_address_space_size,
-                                        compilationMode, bOptions,
-                                        gCompilationCachePath, kernelName);
-    if (error != CL_SUCCESS) return error;
+    error = get_offline_compiler_output(ifs, device, device_address_space_size, openclCXX, compilationMode, bOptions, gCompilationCachePath, kernelName);
+    if (error != CL_SUCCESS)
+      return error;
+    // -----------------------------------------------------------------------------------
+    // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+    // -----------------------------------------------------------------------------------
+    // Only OpenCL C++ to SPIR-V compilation
+    #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    if(openclCXX)
+    {
+        return CL_SUCCESS;
+    }
+    #endif
     ifs.seekg(0, ifs.end);
     int length = ifs.tellg();
     ifs.seekg(0, ifs.beg);
-    // treat modifiedProgram as input for clCreateProgramWithBinary
+    //treat modifiedProgram as input for clCreateProgramWithBinary
     if (compilationMode == kBinary)
         // read binary from file:
@@ -593,17 +642,15 @@
         size_t lengths = modifiedKernelBuf.size();
         const unsigned char *binaries = { &modifiedKernelBuf[0] };
-        log_info("offlineCompiler: clCreateProgramWithSource replaced with "
-                 "clCreateProgramWithBinary\n");
-        *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
-                                                &binaries, NULL, &error);
+        log_info("offlineCompiler: clCreateProgramWithSource replaced with clCreateProgramWithBinary\n");
+        *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths, &binaries, NULL, &error);
         if (*outProgram == NULL || error != CL_SUCCESS)
             print_error(error, "clCreateProgramWithBinary failed");
             return error;
-    // treat modifiedProgram as input for clCreateProgramWithIL
+    //treat modifiedProgram as input for clCreateProgramWithIL
     else if (compilationMode == kSpir_v)
         // read spir-v from file:
@@ -613,44 +660,12 @@
         size_t length = modifiedKernelBuf.size();
-        log_info("offlineCompiler: clCreateProgramWithSource replaced with "
-                 "clCreateProgramWithIL\n");
-        if (gCoreILProgram)
-        {
-            *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
-                                                length, &error);
-        }
-        else
-        {
-            cl_platform_id platform;
-            error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
-                                    sizeof(cl_platform_id), &platform, NULL);
-            test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+        log_info("offlineCompiler: clCreateProgramWithSource replaced with clCreateProgramWithIL\n");
-            clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
-            clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
-                clGetExtensionFunctionAddressForPlatform(
-                    platform, "clCreateProgramWithILKHR");
-            if (clCreateProgramWithILKHR == NULL)
-            {
-                log_error(
-                    "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
-                return -1;
-            }
-            *outProgram = clCreateProgramWithILKHR(
-                context, &modifiedKernelBuf[0], length, &error);
-        }
+        *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0], length, &error);
         if (*outProgram == NULL || error != CL_SUCCESS)
-            if (gCoreILProgram)
-            {
-                print_error(error, "clCreateProgramWithIL failed");
-            }
-            else
-            {
-                print_error(error, "clCreateProgramWithILKHR failed");
-            }
+            print_error(error, "clCreateProgramWithIL failed");
             return error;
@@ -658,24 +673,26 @@
     return CL_SUCCESS;
-static int create_single_kernel_helper_create_program(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *buildOptions, CompilationMode compilationMode)
+static int create_single_kernel_helper_create_program(cl_context context,
+                                                      cl_device_id device,
+                                                      cl_program *outProgram,
+                                                      unsigned int numKernelLines,
+                                                      const char **kernelProgram,
+                                                      const char *buildOptions,
+                                                      const bool openclCXX,
+                                                      CompilationMode compilationMode)
-    std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
+    std::string filePrefix = get_unique_filename_prefix(numKernelLines,
+                                                        kernelProgram,
+                                                        buildOptions);
+    bool shouldSaveToDisk = should_save_kernel_source_to_disk(compilationMode, 
+                                                              gCompilationCacheMode,
+                                                              gCompilationCachePath,
+                                                              filePrefix);
-    std::string filePrefix =
-        get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
-    bool shouldSaveToDisk = should_save_kernel_source_to_disk(
-        compilationMode, gCompilationCacheMode, gCompilationCachePath,
-        filePrefix);
-    if (shouldSaveToDisk)
+    if(shouldSaveToDisk)
-        if (CL_SUCCESS
-            != save_kernel_source_and_options_to_disk(
-                numKernelLines, kernelProgram, buildOptions))
+        if(CL_SUCCESS != save_kernel_source_and_options_to_disk(numKernelLines, kernelProgram, buildOptions))
             log_error("Unable to dump kernel source to disk");
             return -1;
@@ -686,8 +703,7 @@
         int error = CL_SUCCESS;
         /* Create the program object from source */
-        *outProgram = clCreateProgramWithSource(context, numKernelLines,
-                                                kernelProgram, NULL, &error);
+        *outProgram = clCreateProgramWithSource(context, numKernelLines, kernelProgram, NULL, &error);
         if (*outProgram == NULL || error != CL_SUCCESS)
             print_error(error, "clCreateProgramWithSource failed");
@@ -697,9 +713,10 @@
-        return create_single_kernel_helper_create_program_offline(
-            context, device, outProgram, numKernelLines, kernelProgram,
-            buildOptions, compilationMode);
+        return create_single_kernel_helper_create_program_offline(context, device, outProgram,
+                                                                  numKernelLines, kernelProgram,
+                                                                  buildOptions, openclCXX,
+                                                                  compilationMode);
@@ -707,111 +724,139 @@
                                                cl_program *outProgram,
                                                unsigned int numKernelLines,
                                                const char **kernelProgram,
-                                               const char *buildOptions)
+                                               const char *buildOptions,
+                                               const bool openclCXX)
-    return create_single_kernel_helper_create_program(
-        context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
-        gCompilationMode);
+    return create_single_kernel_helper_create_program(context, NULL, outProgram,
+                                                      numKernelLines, kernelProgram,
+                                                      buildOptions, openclCXX,
+                                                      gCompilationMode);
-int create_single_kernel_helper_create_program_for_device(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *buildOptions)
+int create_single_kernel_helper_create_program_for_device(cl_context context,
+                                                          cl_device_id device,
+                                                          cl_program *outProgram,
+                                                          unsigned int numKernelLines,
+                                                          const char **kernelProgram,
+                                                          const char *buildOptions,
+                                                          const bool openclCXX)
-    return create_single_kernel_helper_create_program(
-        context, device, outProgram, numKernelLines, kernelProgram,
-        buildOptions, gCompilationMode);
+    return create_single_kernel_helper_create_program(context, device, outProgram,
+                                                      numKernelLines, kernelProgram,
+                                                      buildOptions, openclCXX,
+                                                      gCompilationMode);
-int create_single_kernel_helper_with_build_options(
-    cl_context context, cl_program *outProgram, cl_kernel *outKernel,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *kernelName, const char *buildOptions)
+int create_single_kernel_helper_with_build_options(cl_context context,
+                                                   cl_program *outProgram,
+                                                   cl_kernel *outKernel,
+                                                   unsigned int numKernelLines,
+                                                   const char **kernelProgram,
+                                                   const char *kernelName,
+                                                   const char *buildOptions,
+                                                   const bool openclCXX)
-    return create_single_kernel_helper(context, outProgram, outKernel,
-                                       numKernelLines, kernelProgram,
-                                       kernelName, buildOptions);
+    return create_single_kernel_helper(context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, buildOptions, openclCXX);
 // Creates and builds OpenCL C/C++ program, and creates a kernel
-int create_single_kernel_helper(cl_context context, cl_program *outProgram,
+int create_single_kernel_helper(cl_context context,
+                                cl_program *outProgram,
                                 cl_kernel *outKernel,
                                 unsigned int numKernelLines,
                                 const char **kernelProgram,
                                 const char *kernelName,
-                                const char *buildOptions)
+                                const char *buildOptions,
+                                const bool openclCXX)
-    // For the logic that automatically adds -cl-std it is much cleaner if the
-    // build options have RAII. This buffer will store the potentially updated
-    // build options, in which case buildOptions will point at the string owned
-    // by this buffer.
-    std::string build_options_internal{ buildOptions ? buildOptions : "" };
-    // Check the build options for the -cl-std option.
-    if (!buildOptions || !strstr(buildOptions, "-cl-std"))
+    int error;
+    // Create OpenCL C++ program
+    if(openclCXX)
-        // If the build option isn't present add it using the latest OpenCL-C
-        // version supported by the device. This allows calling code to force a
-        // particular CL C version if it is required, but also means that
-        // callers need not specify a version if they want to assume the most
-        // recent CL C.
-        auto version = get_max_OpenCL_C_for_context(context);
-        std::string cl_std{};
-        if (version >= Version(3, 0))
+    // -----------------------------------------------------------------------------------
+    // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+    // -----------------------------------------------------------------------------------
+    // Only OpenCL C++ to SPIR-V compilation
+    #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        // Save global variable
+        bool tempgCompilationCacheMode = gCompilationCacheMode;
+        // Force OpenCL C++ -> SPIR-V compilation on every run
+        gCompilationCacheMode = kCacheModeOverwrite;
+    #endif
+        error = create_openclcpp_program(
+            context, outProgram, numKernelLines, kernelProgram, buildOptions
+        );
+        if (error != CL_SUCCESS)
-            cl_std = "-cl-std=CL3.0";
+            log_error("Create program failed: %d, line: %d\n", error, __LINE__);
+            return error;
-        else if (version >= Version(2, 0) && version < Version(3, 0))
-        {
-            cl_std = "-cl-std=CL2.0";
-        }
-        else
-        {
-            // If the -cl-std build option is not specified, the highest OpenCL
-            // C 1.x language version supported by each device is used when
-            // compiling the program for each device.
-            cl_std = "";
-        }
-        build_options_internal += ' ';
-        build_options_internal += cl_std;
-        buildOptions = build_options_internal.c_str();
-    }
-    int error = create_single_kernel_helper_create_program(
-        context, outProgram, numKernelLines, kernelProgram, buildOptions);
-    if (error != CL_SUCCESS)
-    {
-        log_error("Create program failed: %d, line: %d\n", error, __LINE__);
+    // -----------------------------------------------------------------------------------
+    // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+    // -----------------------------------------------------------------------------------
+    #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        // Restore global variables
+        gCompilationCacheMode = tempgCompilationCacheMode;
+        log_info("WARNING: KERNEL %s WAS ONLY COMPILED TO SPIR-V\n", kernelName);
         return error;
+    #endif
+    // Create OpenCL C program
+    else
+    {
+        error = create_single_kernel_helper_create_program(
+            context, outProgram, numKernelLines, kernelProgram, buildOptions
+        );
+        if (error != CL_SUCCESS)
+        {
+            log_error("Create program failed: %d, line: %d\n", error, __LINE__);
+            return error;
+        }
+    }
     // Remove offline-compiler-only build options
     std::string newBuildOptions;
     if (buildOptions != NULL)
         newBuildOptions = buildOptions;
         std::string offlineCompierOptions[] = {
-            "-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
+            "-cl-fp16-enable",
+            "-cl-fp64-enable",
+            "-cl-zero-init-local-mem-vars"
-        for (auto &s : offlineCompierOptions)
+        for(auto& s : offlineCompierOptions)
             std::string::size_type i = newBuildOptions.find(s);
-            if (i != std::string::npos) newBuildOptions.erase(i, s.length());
+            if (i != std::string::npos)
+                newBuildOptions.erase(i, s.length());
     // Build program and create kernel
     return build_program_create_kernel_helper(
-        context, outProgram, outKernel, numKernelLines, kernelProgram,
-        kernelName, newBuildOptions.c_str());
+        context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, newBuildOptions.c_str()
+    );
+// Creates OpenCL C++ program
+int create_openclcpp_program(cl_context context,
+                             cl_program *outProgram,
+                             unsigned int numKernelLines,
+                             const char **kernelProgram,
+                             const char *buildOptions)
+    // Create program
+    return create_single_kernel_helper_create_program(
+        context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions, true, kSpir_v
+    );
 // Builds OpenCL C/C++ program and creates
-int build_program_create_kernel_helper(
-    cl_context context, cl_program *outProgram, cl_kernel *outKernel,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *kernelName, const char *buildOptions)
+int build_program_create_kernel_helper(cl_context context,
+                                       cl_program *outProgram,
+                                       cl_kernel *outKernel,
+                                       unsigned int numKernelLines,
+                                       const char **kernelProgram,
+                                       const char *kernelName,
+                                       const char *buildOptions)
     int error;
     /* Compile the program */
@@ -826,13 +871,13 @@
         printedSource = 1;
         log_error("Build options: %s\n", buildOptions);
         log_error("Original source is: ------------\n");
-        for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
+        for (i = 0; i < numKernelLines; i++)
+            log_error("%s", kernelProgram[i]);
     // Verify the build status on all devices
     cl_uint deviceCount = 0;
-    error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
-                             sizeof(deviceCount), &deviceCount, NULL);
+    error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES, sizeof(deviceCount), &deviceCount, NULL);
     if (error != CL_SUCCESS)
         print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
@@ -845,14 +890,13 @@
         return -1;
-    cl_device_id *devices =
-        (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
-    if (NULL == devices) return -1;
+    cl_device_id *devices = (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
+    if (NULL == devices)
+        return -1;
     BufferOwningPtr<cl_device_id> devicesBuf(devices);
     memset(devices, 0, deviceCount * sizeof(cl_device_id));
-    error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
-                             sizeof(cl_device_id) * deviceCount, devices, NULL);
+    error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * deviceCount, devices, NULL);
     if (error != CL_SUCCESS)
         print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
@@ -864,8 +908,7 @@
     for (z = 0; z < deviceCount; z++)
         char deviceName[4096] = "";
-        error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
-                                deviceName, NULL);
+        error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
         if (error != CL_SUCCESS || deviceName[0] == '\0')
             log_error("Device \"%d\" failed to return a name\n", z);
@@ -873,22 +916,17 @@
         cl_build_status buildStatus;
-        error = clGetProgramBuildInfo(*outProgram, devices[z],
-                                      CL_PROGRAM_BUILD_STATUS,
-                                      sizeof(buildStatus), &buildStatus, NULL);
+        error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
         if (error != CL_SUCCESS)
-            print_error(error,
-                        "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+            print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
             return error;
-        if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
-            && deviceCount == 1)
+        if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed && deviceCount == 1)
             buildFailed = true;
-            log_error("clBuildProgram returned an error, but buildStatus is "
-                      "marked as CL_BUILD_SUCCESS.\n");
+            log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
         if (buildStatus != CL_BUILD_SUCCESS)
@@ -907,35 +945,25 @@
                 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
             if (buildStatus != CL_BUILD_SUCCESS)
-                log_error(
-                    "Build not successful for device \"%s\", status: %s\n",
-                    deviceName, statusString);
+                log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
             size_t paramSize = 0;
-            error = clGetProgramBuildInfo(*outProgram, devices[z],
-                                          CL_PROGRAM_BUILD_LOG, 0, NULL,
-                                          &paramSize);
+            error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, 0, NULL, &paramSize);
             if (error != CL_SUCCESS)
-                print_error(
-                    error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+                print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
                 return error;
             std::string log;
             log.resize(paramSize / sizeof(char));
-            error = clGetProgramBuildInfo(*outProgram, devices[z],
-                                          CL_PROGRAM_BUILD_LOG, paramSize,
-                                          &log[0], NULL);
+            error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
             if (error != CL_SUCCESS || log[0] == '\0')
-                log_error("Device %d (%s) failed to return a build log\n", z,
-                          deviceName);
+                log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
                 if (error)
-                    print_error(
-                        error,
-                        "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+                    print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
                     return error;
@@ -953,8 +981,7 @@
                     log_error("%s", kernelProgram[i]);
                 printedSource = 1;
-            log_error("Build log for device \"%s\" is: ------------\n",
-                      deviceName);
+            log_error("Build log for device \"%s\" is: ------------\n", deviceName);
             log_error("%s\n", log.c_str());
             return -1;
@@ -980,70 +1007,57 @@
     return 0;
-int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
-                                    size_t *outMaxSize, size_t *outLimits)
+int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
     cl_device_id *devices;
     size_t size, maxCommonSize = 0;
     int numDevices, i, j, error;
-    cl_uint numDims;
+  cl_uint numDims;
     size_t outSize;
-    size_t sizeLimit[] = { 1, 1, 1 };
+  size_t sizeLimit[]={1,1,1};
     /* Assume fewer than 16 devices will be returned */
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
-    test_error(error, "Unable to obtain list of devices size for context");
-    devices = (cl_device_id *)malloc(outSize);
-    BufferOwningPtr<cl_device_id> devicesBuf(devices);
+  error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
+  test_error( error, "Unable to obtain list of devices size for context" );
+  devices = (cl_device_id *)malloc(outSize);
+  BufferOwningPtr<cl_device_id> devicesBuf(devices);
-    error =
-        clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
-    test_error(error, "Unable to obtain list of devices for context");
+  error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
+  test_error( error, "Unable to obtain list of devices for context" );
-    numDevices = (int)(outSize / sizeof(cl_device_id));
+    numDevices = (int)( outSize / sizeof( cl_device_id ) );
-    for (i = 0; i < numDevices; i++)
+    for( i = 0; i < numDevices; i++ )
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
-                                sizeof(size), &size, NULL);
-        test_error(error, "Unable to obtain max work group size for device");
-        if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
+        error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
+        test_error( error, "Unable to obtain max work group size for device" );
+        if( size < maxCommonSize || maxCommonSize == 0)
+            maxCommonSize = size;
-        error = clGetKernelWorkGroupInfo(kernel, devices[i],
-                                         CL_KERNEL_WORK_GROUP_SIZE,
-                                         sizeof(size), &size, NULL);
-        test_error(
-            error,
-            "Unable to obtain max work group size for device and kernel combo");
-        if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
+        error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
+        test_error( error, "Unable to obtain max work group size for device and kernel combo" );
+        if( size < maxCommonSize  || maxCommonSize == 0)
+            maxCommonSize = size;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
-                                sizeof(numDims), &numDims, NULL);
-        test_error(
-            error,
-            "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
-        sizeLimit[0] = 1;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
-                                numDims * sizeof(size_t), sizeLimit, NULL);
-        test_error(error,
-                   "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+    error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
+    test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
+    sizeLimit[0] = 1;
+    error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
+        test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
         if (outLimits != NULL)
-            if (i == 0)
-            {
-                for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
-            }
-            else
-            {
-                for (j = 0; j < (int)numDims; j++)
-                {
-                    if (sizeLimit[j] < outLimits[j])
-                        outLimits[j] = sizeLimit[j];
-                }
-            }
+      if (i == 0) {
+        for (j=0; j<3; j++)
+          outLimits[j] = sizeLimit[j];
+      } else {
+        for (j=0; j<(int)numDims; j++) {
+          if (sizeLimit[j] < outLimits[j])
+            outLimits[j] = sizeLimit[j];
+      }
+    }
     *outMaxSize = (unsigned int)maxCommonSize;
@@ -1051,230 +1065,204 @@
-extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
-                                                        cl_kernel kernel,
-                                                        size_t *outSize)
+extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize )
-    cl_uint maxDim;
-    size_t maxWgSize;
-    size_t *maxWgSizePerDim;
-    int error;
+    cl_uint      maxDim;
+    size_t       maxWgSize;
+    size_t       *maxWgSizePerDim;
+    int          error;
-    error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
-                                     sizeof(size_t), &maxWgSize, NULL);
-    test_error(error,
-               "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
+    error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( size_t ), &maxWgSize, NULL );
+    test_error( error, "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed" );
-    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
-                            sizeof(cl_uint), &maxDim, NULL);
-    test_error(error,
-               "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
-    maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
-    if (!maxWgSizePerDim)
+    error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &maxDim, NULL );
+    test_error( error, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed" );
+    maxWgSizePerDim = (size_t*)malloc( maxDim * sizeof( size_t ) );
+    if( !maxWgSizePerDim )
-        log_error("Unable to allocate maxWgSizePerDim\n");
+        log_error( "Unable to allocate maxWgSizePerDim\n" );
         return -1;
-    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
-                            maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
-    if (error != CL_SUCCESS)
+    error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, maxDim * sizeof( size_t ), maxWgSizePerDim, NULL );
+    if( error != CL_SUCCESS)
-        log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
-        free(maxWgSizePerDim);
+        log_error( "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n" );
+        free( maxWgSizePerDim );
         return error;
     // "maxWgSize" is limited to that of the first dimension.
-    if (maxWgSize > maxWgSizePerDim[0])
+    if( maxWgSize > maxWgSizePerDim[0] )
         maxWgSize = maxWgSizePerDim[0];
-    free(maxWgSizePerDim);
+    free( maxWgSizePerDim );
     *outSize = maxWgSize;
     return 0;
-int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
-                                   size_t globalThreadSize, size_t *outMaxSize)
+int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
+                                   size_t globalThreadSize, size_t *outMaxSize )
-    size_t sizeLimit[3];
-    int error =
-        get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
-    if (error != 0) return error;
+  size_t sizeLimit[3];
+    int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
+    if( error != 0 )
+        return error;
-    /* Now find the largest factor of globalThreadSize that is <= maxCommonSize
-     */
-    /* Note for speed, we don't need to check the range of maxCommonSize, b/c
-     once it gets to 1, the modulo test will succeed and break the loop anyway
-   */
-    for (;
-         (globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
-         (*outMaxSize)--)
+    /* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
+    /* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
+     the modulo test will succeed and break the loop anyway */
+    for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
     return 0;
-int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
-                                      size_t *globalThreadSizes,
-                                      size_t *outMaxSizes)
+int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
+                                   size_t *globalThreadSizes, size_t *outMaxSizes )
-    size_t sizeLimit[3];
+  size_t sizeLimit[3];
     size_t maxSize;
-    int error =
-        get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
-    if (error != 0) return error;
+    int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
+    if( error != 0 )
+        return error;
-    /* Now find a set of factors, multiplied together less than maxSize, but
-       each a factor of the global sizes */
+    /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
+       sizes */
     /* Simple case */
-    if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
+    if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
-        if (globalThreadSizes[0] <= sizeLimit[0]
-            && globalThreadSizes[1] <= sizeLimit[1])
-        {
-            outMaxSizes[0] = globalThreadSizes[0];
-            outMaxSizes[1] = globalThreadSizes[1];
-            return 0;
-        }
+    if (globalThreadSizes[ 0 ] <= sizeLimit[0] &&  globalThreadSizes[ 1 ] <= sizeLimit[1]) {
+      outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
+      outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
+      return 0;
+    }
-    size_t remainingSize, sizeForThisOne;
+  size_t remainingSize, sizeForThisOne;
+  remainingSize = maxSize;
+  int i, j;
+  for (i=0 ; i<2; i++) {
+    if (globalThreadSizes[i] > remainingSize)
+      sizeForThisOne = remainingSize;
+    else
+      sizeForThisOne = globalThreadSizes[i];
+    for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
+    outMaxSizes[i] = sizeForThisOne;
     remainingSize = maxSize;
-    int i, j;
-    for (i = 0; i < 2; i++)
-    {
-        if (globalThreadSizes[i] > remainingSize)
-            sizeForThisOne = remainingSize;
-        else
-            sizeForThisOne = globalThreadSizes[i];
-        for (; (globalThreadSizes[i] % sizeForThisOne) != 0
-             || (sizeForThisOne > sizeLimit[i]);
-             sizeForThisOne--)
-            ;
-        outMaxSizes[i] = sizeForThisOne;
-        remainingSize = maxSize;
-        for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
-    }
+    for (j=0; j<=i; j++)
+      remainingSize /=outMaxSizes[j];
+  }
     return 0;
-int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
-                                      size_t *globalThreadSizes,
-                                      size_t *outMaxSizes)
+int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
+                                      size_t *globalThreadSizes, size_t *outMaxSizes )
-    size_t sizeLimit[3];
+  size_t sizeLimit[3];
     size_t maxSize;
-    int error =
-        get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
-    if (error != 0) return error;
-    /* Now find a set of factors, multiplied together less than maxSize, but
-     each a factor of the global sizes */
+    int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+    /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
+     sizes */
     /* Simple case */
-    if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
-        <= maxSize)
+    if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
-        if (globalThreadSizes[0] <= sizeLimit[0]
-            && globalThreadSizes[1] <= sizeLimit[1]
-            && globalThreadSizes[2] <= sizeLimit[2])
-        {
-            outMaxSizes[0] = globalThreadSizes[0];
-            outMaxSizes[1] = globalThreadSizes[1];
-            outMaxSizes[2] = globalThreadSizes[2];
-            return 0;
-        }
+    if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
+      outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
+      outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
+      outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
+      return 0;
+    }
-    size_t remainingSize, sizeForThisOne;
+  size_t remainingSize, sizeForThisOne;
+  remainingSize = maxSize;
+  int i, j;
+  for (i=0 ; i<3; i++) {
+    if (globalThreadSizes[i] > remainingSize)
+      sizeForThisOne = remainingSize;
+    else
+      sizeForThisOne = globalThreadSizes[i];
+    for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
+    outMaxSizes[i] = sizeForThisOne;
     remainingSize = maxSize;
-    int i, j;
-    for (i = 0; i < 3; i++)
-    {
-        if (globalThreadSizes[i] > remainingSize)
-            sizeForThisOne = remainingSize;
-        else
-            sizeForThisOne = globalThreadSizes[i];
-        for (; (globalThreadSizes[i] % sizeForThisOne) != 0
-             || (sizeForThisOne > sizeLimit[i]);
-             sizeForThisOne--)
-            ;
-        outMaxSizes[i] = sizeForThisOne;
-        remainingSize = maxSize;
-        for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
-    }
+    for (j=0; j<=i; j++)
+      remainingSize /=outMaxSizes[j];
+  }
     return 0;
 /* Helper to determine if a device supports an image format */
-int is_image_format_supported(cl_context context, cl_mem_flags flags,
-                              cl_mem_object_type image_type,
-                              const cl_image_format *fmt)
+int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
     cl_image_format *list;
     cl_uint count = 0;
-    cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
-                                            NULL, &count);
-    if (count == 0) return 0;
+    cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
+    if( count == 0 )
+        return 0;
-    list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
-    if (NULL == list)
+    list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
+    if( NULL == list )
-        log_error("Error: unable to allocate %ld byte buffer for image format "
-                  "list at %s:%d (err = %d)\n",
-                  count * sizeof(cl_image_format), __FILE__, __LINE__, err);
+        log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__,  err );
         return 0;
     BufferOwningPtr<cl_image_format> listBuf(list);
-    cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
-                                              list, NULL);
-    if (error)
+    cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
+    if( error )
-        log_error("Error: failed to obtain supported image type list at %s:%d "
-                  "(err = %d)\n",
-                  __FILE__, __LINE__, err);
+        log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
         return 0;
     // iterate looking for a match.
     cl_uint i;
-    for (i = 0; i < count; i++)
+    for( i = 0; i < count; i++ )
-        if (fmt->image_channel_data_type == list[i].image_channel_data_type
-            && fmt->image_channel_order == list[i].image_channel_order)
+        if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
+            fmt->image_channel_order == list[ i ].image_channel_order )
-    return (i < count) ? 1 : 0;
+    return ( i < count ) ? 1 : 0;
-size_t get_pixel_bytes(const cl_image_format *fmt);
-size_t get_pixel_bytes(const cl_image_format *fmt)
+size_t get_pixel_bytes( const cl_image_format *fmt );
+size_t get_pixel_bytes( const cl_image_format *fmt )
     size_t chanCount;
-    switch (fmt->image_channel_order)
+    switch( fmt->image_channel_order )
         case CL_R:
         case CL_A:
         case CL_Rx:
         case CL_INTENSITY:
         case CL_LUMINANCE:
-        case CL_DEPTH: chanCount = 1; break;
+        case CL_DEPTH:
+            chanCount = 1;
+            break;
         case CL_RG:
         case CL_RA:
-        case CL_RGx: chanCount = 2; break;
+        case CL_RGx:
+            chanCount = 2;
+            break;
         case CL_RGB:
         case CL_RGBx:
         case CL_sRGB:
-        case CL_sRGBx: chanCount = 3; break;
+        case CL_sRGBx:
+            chanCount = 3;
+            break;
         case CL_RGBA:
         case CL_ARGB:
         case CL_BGRA:
@@ -1289,73 +1277,74 @@
             chanCount = 4;
-            log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
+            log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
-    switch (fmt->image_channel_data_type)
+    switch( fmt->image_channel_data_type )
-        case CL_UNORM_SHORT_565:
-        case CL_UNORM_SHORT_555: return 2;
+          case CL_UNORM_SHORT_565:
+          case CL_UNORM_SHORT_555:
+            return 2;
-        case CL_UNORM_INT_101010: return 4;
+          case CL_UNORM_INT_101010:
+            return 4;
-        case CL_SNORM_INT8:
-        case CL_UNORM_INT8:
-        case CL_SIGNED_INT8:
-        case CL_UNSIGNED_INT8: return chanCount;
+          case CL_SNORM_INT8:
+          case CL_UNORM_INT8:
+          case CL_SIGNED_INT8:
+          case CL_UNSIGNED_INT8:
+            return chanCount;
-        case CL_SNORM_INT16:
-        case CL_UNORM_INT16:
-        case CL_HALF_FLOAT:
-        case CL_SIGNED_INT16:
-        case CL_UNSIGNED_INT16:
+          case CL_SNORM_INT16:
+          case CL_UNORM_INT16:
+          case CL_HALF_FLOAT:
+          case CL_SIGNED_INT16:
+          case CL_UNSIGNED_INT16:
-        case CL_SFIXED14_APPLE:
+          case CL_SFIXED14_APPLE:
             return chanCount * 2;
-        case CL_SIGNED_INT32:
-        case CL_UNSIGNED_INT32:
-        case CL_FLOAT: return chanCount * 4;
+          case CL_SIGNED_INT32:
+          case CL_UNSIGNED_INT32:
+          case CL_FLOAT:
+            return chanCount * 4;
-            log_error("Unknown channel data type at %s:%d!\n", __FILE__,
-                      __LINE__);
+            log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
     return 0;
-test_status verifyImageSupport(cl_device_id device)
+test_status verifyImageSupport( cl_device_id device )
-    int result = checkForImageSupport(device);
-    if (result == 0)
+    int result = checkForImageSupport( device );
+    if( result == 0 )
         return TEST_PASS;
+    if( result == CL_IMAGE_FORMAT_NOT_SUPPORTED )
-        log_error("SKIPPED: Device does not supported images as required by "
-                  "this test!\n");
+        log_error( "SKIPPED: Device does not supported images as required by this test!\n" );
         return TEST_SKIP;
     return TEST_FAIL;
-int checkForImageSupport(cl_device_id device)
+int checkForImageSupport( cl_device_id device )
     cl_uint i;
     int error;
     /* Check the device props to see if images are supported at all first */
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
-    test_error(error, "Unable to query device for image support");
-    if (i == 0)
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
+    test_error( error, "Unable to query device for image support" );
+    if( i == 0 )
@@ -1364,78 +1353,37 @@
     return 0;
-int checkFor3DImageSupport(cl_device_id device)
+int checkFor3DImageSupport( cl_device_id device )
     cl_uint i;
     int error;
     /* Check the device props to see if images are supported at all first */
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
-    test_error(error, "Unable to query device for image support");
-    if (i == 0)
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
+    test_error( error, "Unable to query device for image support" );
+    if( i == 0 )
     char profile[128];
-    error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
-                            NULL);
-    test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
-    if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
+    error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
+    test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
+    if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
         size_t width = -1L;
         size_t height = -1L;
         size_t depth = -1L;
-        error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
-                                sizeof(width), &width, NULL);
-        test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
-        error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
-                                sizeof(height), &height, NULL);
-        test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
-        error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
-                                sizeof(depth), &depth, NULL);
-        test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
-        if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
-    }
-    /* So our support is good */
-    return 0;
-int checkForReadWriteImageSupport(cl_device_id device)
-    if (checkForImageSupport(device))
-    {
-    }
-    auto device_cl_version = get_device_cl_version(device);
-    if (device_cl_version >= Version(3, 0))
-    {
-        // In OpenCL 3.0, Read-Write images are optional.
-        // Check if they are supported.
-        cl_uint are_rw_images_supported{};
-        test_error(
-            clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
-                            sizeof(are_rw_images_supported),
-                            &are_rw_images_supported, nullptr),
-            "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
-        if (0 == are_rw_images_supported)
-        {
-            log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
+        if( 0 == (height | width | depth ))
             return CL_IMAGE_FORMAT_NOT_SUPPORTED;
-        }
-    // READ_WRITE images are not supported on 1.X devices.
-    else if (device_cl_version < Version(2, 0))
-    {
-        log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
-    }
-    // Support for read-write image arguments is required
-    // for an 2.X device if the device supports images.
     /* So our support is good */
     return 0;
@@ -1445,43 +1393,51 @@
     static cl_uint align_size = 0;
-    if (0 == align_size)
+    if( 0 == align_size )
-        cl_device_id *devices;
+        cl_device_id * devices;
         size_t devices_size = 0;
         cl_uint result = 0;
         cl_int error;
         int i;
-        error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
-                                 &devices_size);
+        error = clGetContextInfo (context,
+                                  CL_CONTEXT_DEVICES,
+                                  0,
+                                  NULL,
+                                  &devices_size);
         test_error_ret(error, "clGetContextInfo failed", 0);
-        devices = (cl_device_id *)malloc(devices_size);
-        if (devices == NULL)
-        {
-            print_error(error, "malloc failed");
+        devices = (cl_device_id*)malloc(devices_size);
+        if (devices == NULL) {
+            print_error( error, "malloc failed" );
             return 0;
-        error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
-                                 (void *)devices, NULL);
+        error = clGetContextInfo (context,
+                                  CL_CONTEXT_DEVICES,
+                                  devices_size,
+                                  (void*)devices,
+                                  NULL);
         test_error_ret(error, "clGetContextInfo failed", 0);
-        for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
+        for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
             cl_uint alignment = 0;
-            error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
-                                    sizeof(cl_uint), (void *)&alignment, NULL);
+            error = clGetDeviceInfo (devices[i],
+                                     CL_DEVICE_MEM_BASE_ADDR_ALIGN,
+                                     sizeof(cl_uint),
+                                     (void*)&alignment,
+                                     NULL);
             if (error == CL_SUCCESS)
-                alignment >>= 3; // convert bits to bytes
+                alignment >>= 3;    // convert bits to bytes
                 result = (alignment > result) ? alignment : result;
-                print_error(error, "clGetDeviceInfo failed");
+                print_error( error, "clGetDeviceInfo failed" );
         align_size = result;
@@ -1491,291 +1447,59 @@
     return align_size;
-cl_device_fp_config get_default_rounding_mode(cl_device_id device)
+cl_device_fp_config get_default_rounding_mode( cl_device_id device )
     char profileStr[128] = "";
     cl_device_fp_config single = 0;
-    int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
-                                sizeof(single), &single, NULL);
-    if (error)
-        test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
-                       0);
+    int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
+    if( error )
+        test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
-    if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
+    if( single & CL_FP_ROUND_TO_NEAREST )
+        return CL_FP_ROUND_TO_NEAREST;
-    if (0 == (single & CL_FP_ROUND_TO_ZERO))
-        test_error_ret(-1,
-                       "FAILURE: device must support either "
-                       0);
+    if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
+        test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
     // Make sure we are an embedded device before allowing a pass
-    if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
-                                 &profileStr, NULL)))
-        test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
+    if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
+        test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
-    if (strcmp(profileStr, "EMBEDDED_PROFILE"))
-        test_error_ret(error,
-                       "FAILURE: non-EMBEDDED_PROFILE devices must support "
-                       "CL_FP_ROUND_TO_NEAREST",
-                       0);
+    if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
+        test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
     return CL_FP_ROUND_TO_ZERO;
-int checkDeviceForQueueSupport(cl_device_id device,
-                               cl_command_queue_properties prop)
+int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
     cl_command_queue_properties realProps;
-    cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
-                                   sizeof(realProps), &realProps, NULL);
-    test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
+    cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( realProps ), &realProps, NULL );
+    test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
-    return (realProps & prop) ? 1 : 0;
+    return ( realProps & prop ) ? 1 : 0;
-int printDeviceHeader(cl_device_id device)
+int printDeviceHeader( cl_device_id device )
-    char deviceName[512], deviceVendor[512], deviceVersion[512],
-        cLangVersion[512];
+    char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
     int error;
-    error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
-                            deviceName, NULL);
-    test_error(error, "Unable to get CL_DEVICE_NAME for device");
+    error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
+    test_error( error, "Unable to get CL_DEVICE_NAME for device" );
-    error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
-                            deviceVendor, NULL);
-    test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
+    error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
+    test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
-    error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
-                            deviceVersion, NULL);
-    test_error(error, "Unable to get CL_DEVICE_VERSION for device");
+    error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
+    test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
-    error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
-                            sizeof(cLangVersion), cLangVersion, NULL);
-    test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
+    error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
+    test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
-    log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
-             "Device Version = %s%s%s\n",
-             deviceName, deviceVendor, deviceVersion,
-             (error == CL_SUCCESS) ? ", CL C Version = " : "",
-             (error == CL_SUCCESS) ? cLangVersion : "");
-    auto version = get_device_cl_version(device);
-    if (version >= Version(3, 0))
-    {
-        auto ctsVersion = get_device_info_string(
-        log_info("Device latest conformance version passed: %s\n",
-                 ctsVersion.c_str());
-    }
+    log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
+             deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
+             ( error == CL_SUCCESS ) ? cLangVersion : "" );
     return CL_SUCCESS;
-Version get_device_cl_c_version(cl_device_id device)
-    auto device_cl_version = get_device_cl_version(device);
-    // The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
-    // did not exist, but since this is just the first version we can
-    // return 1.0.
-    if (device_cl_version == Version{ 1, 0 })
-    {
-        return Version{ 1, 0 };
-    }
-    // Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
-    // versions are backwards compatible, hence querying with the
-    // CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
-    // OpenCL C version.
-    size_t opencl_c_version_size_in_bytes{};
-    auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
-                                 &opencl_c_version_size_in_bytes);
-    test_error_ret(error,
-                   "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
-                   (Version{ -1, 0 }));
-    std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
-                        opencl_c_version.size(), &opencl_c_version[0], nullptr);
-    test_error_ret(error,
-                   "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
-                   (Version{ -1, 0 }));
-    // Scrape out the major, minor pair from the string.
-    auto major = opencl_c_version[opencl_c_version.find('.') - 1];
-    auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
-    return Version{ major - '0', minor - '0' };
-Version get_device_latest_cl_c_version(cl_device_id device)
-    auto device_cl_version = get_device_cl_version(device);
-    // If the device version >= 3.0 it must support the
-    // CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
-    // recent CL C version supported by the device.
-    if (device_cl_version >= Version{ 3, 0 })
-    {
-        size_t opencl_c_all_versions_size_in_bytes{};
-        auto error =
-            clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
-                            &opencl_c_all_versions_size_in_bytes);
-        test_error_ret(
-            error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
-            (Version{ -1, 0 }));
-        std::vector<cl_name_version> name_versions(
-            opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
-        error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
-                                opencl_c_all_versions_size_in_bytes,
-                      , nullptr);
-        test_error_ret(
-            error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
-            (Version{ -1, 0 }));
-        Version max_supported_cl_c_version{};
-        for (const auto &name_version : name_versions)
-        {
-            Version current_version{ CL_VERSION_MAJOR(name_version.version),
-                                     CL_VERSION_MINOR(name_version.version) };
-            max_supported_cl_c_version =
-                (current_version > max_supported_cl_c_version)
-                ? current_version
-                : max_supported_cl_c_version;
-        }
-        return max_supported_cl_c_version;
-    }
-    return get_device_cl_c_version(device);
-Version get_max_OpenCL_C_for_context(cl_context context)
-    // Get all the devices in the context and find the maximum
-    // universally supported OpenCL C version.
-    size_t devices_size_in_bytes{};
-    auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
-                                  &devices_size_in_bytes);
-    test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
-                   (Version{ -1, 0 }));
-    std::vector<cl_device_id> devices(devices_size_in_bytes
-                                      / sizeof(cl_device_id));
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
-                   , nullptr);
-    auto current_version = get_device_latest_cl_c_version(devices[0]);
-    std::for_each(std::next(devices.begin()), devices.end(),
-                  [&current_version](cl_device_id device) {
-                      auto device_version =
-                          get_device_latest_cl_c_version(device);
-                      // OpenCL 3.0 is not backwards compatible with 2.0.
-                      // If we have 3.0 and 2.0 in the same driver we
-                      // use 1.2.
-                      if (((device_version >= Version(2, 0)
-                            && device_version < Version(3, 0))
-                           && current_version >= Version(3, 0))
-                          || (device_version >= Version(3, 0)
-                              && (current_version >= Version(2, 0)
-                                  && current_version < Version(3, 0))))
-                      {
-                          current_version = Version(1, 2);
-                      }
-                      else
-                      {
-                          current_version =
-                              (std::min)(device_version, current_version);
-                      }
-                  });
-    return current_version;
-bool device_supports_cl_c_version(cl_device_id device, Version version)
-    auto device_cl_version = get_device_cl_version(device);
-    // In general, a device does not support an OpenCL C version if it is <=
-    // CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
-    // If the device version >= 3.0 it must support the
-    // CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
-    // used must appear in the query result if it's <=
-    if (device_cl_version >= Version{ 3, 0 })
-    {
-        size_t opencl_c_all_versions_size_in_bytes{};
-        auto error =
-            clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
-                            &opencl_c_all_versions_size_in_bytes);
-        test_error_ret(
-            error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
-            (false));
-        std::vector<cl_name_version> name_versions(
-            opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
-        error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
-                                opencl_c_all_versions_size_in_bytes,
-                      , nullptr);
-        test_error_ret(
-            error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
-            (false));
-        for (const auto &name_version : name_versions)
-        {
-            Version current_version{ CL_VERSION_MAJOR(name_version.version),
-                                     CL_VERSION_MINOR(name_version.version) };
-            if (current_version == version)
-            {
-                return true;
-            }
-        }
-    }
-    return version <= get_device_cl_c_version(device);
-bool poll_until(unsigned timeout_ms, unsigned interval_ms,
-                std::function<bool()> fn)
-    unsigned time_spent_ms = 0;
-    bool ret = false;
-    while (time_spent_ms < timeout_ms)
-    {
-        ret = fn();
-        if (ret)
-        {
-            break;
-        }
-        usleep(interval_ms * 1000);
-        time_spent_ms += interval_ms;
-    }
-    return ret;
-bool device_supports_double(cl_device_id device)
-    if (is_extension_available(device, "cl_khr_fp64"))
-    {
-        return true;
-    }
-    else
-    {
-        cl_device_fp_config double_fp_config;
-        cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
-                                     sizeof(double_fp_config),
-                                     &double_fp_config, nullptr);
-        test_error(err,
-                   "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
-        return double_fp_config != 0;
-    }
-bool device_supports_half(cl_device_id device)
-    return is_extension_available(device, "cl_khr_fp16");
diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h
index 4d8f2a8..e97ec1e 100644
--- a/test_common/harness/kernelHelpers.h
+++ b/test_common/harness/kernelHelpers.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -25,26 +25,23 @@
 #include <stdio.h>
 #include <stdlib.h>
-#if defined(__MINGW32__)
+#if defined (__MINGW32__)
 #include <malloc.h>
 #include <string.h>
 #ifdef __APPLE__
-#include <OpenCL/opencl.h>
+    #include <OpenCL/opencl.h>
-#include <CL/opencl.h>
+    #include <CL/opencl.h>
 #include "deviceInfo.h"
 #include "harness/alloc.h"
-#include <functional>
- *  The below code is intended to be used at the top of kernels that appear
- * inline in files to set line and file info for the kernel:
+ *  The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
  *  const char *source = {
@@ -54,164 +51,126 @@
  *      "}\n"
  *  };
-#define SET_OPENCL_LINE_INFO(_line, _file)                                     \
-    "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
+#define INIT_OPENCL_DEBUG_INFO                      SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
+#define SET_OPENCL_LINE_INFO(_line, _file)          "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
+    #define STRINGIFY_VALUE(_x)                     STRINGIFY(_x)
 #ifndef STRINGIFY
-#define STRINGIFY(_x) #_x
+    #define STRINGIFY(_x)                           #_x
 const int MAX_LEN_FOR_KERNEL_LIST = 20;
-/* Helper that creates a single program and kernel from a single-kernel program
- * source */
-extern int
-create_single_kernel_helper(cl_context context, cl_program *outProgram,
-                            cl_kernel *outKernel, unsigned int numKernelLines,
-                            const char **kernelProgram, const char *kernelName,
-                            const char *buildOptions = NULL);
+/* Helper that creates a single program and kernel from a single-kernel program source */
+extern int create_single_kernel_helper(cl_context context, 
+                                       cl_program *outProgram,
+                                       cl_kernel *outKernel,
+                                       unsigned int numKernelLines,
+                                       const char **kernelProgram,
+                                       const char *kernelName,
+                                       const char *buildOptions = NULL,
+                                       const bool openclCXX = false);
-extern int create_single_kernel_helper_with_build_options(
-    cl_context context, cl_program *outProgram, cl_kernel *outKernel,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *kernelName, const char *buildOptions);
+extern int create_single_kernel_helper_with_build_options(cl_context context, 
+                                                          cl_program *outProgram,
+                                                          cl_kernel *outKernel,
+                                                          unsigned int numKernelLines,
+                                                          const char **kernelProgram,
+                                                          const char *kernelName,
+                                                          const char *buildOptions,
+                                                          const bool openclCXX = false);
-extern int create_single_kernel_helper_create_program(
-    cl_context context, cl_program *outProgram, unsigned int numKernelLines,
-    const char **kernelProgram, const char *buildOptions = NULL);
+extern int create_single_kernel_helper_create_program(cl_context context, 
+                                                      cl_program *outProgram,
+                                                      unsigned int numKernelLines,
+                                                      const char **kernelProgram,
+                                                      const char *buildOptions = NULL,
+                                                      const bool openclCXX = false);
+extern int create_single_kernel_helper_create_program_for_device(cl_context context,
+                                                                 cl_device_id device,
+                                                                 cl_program *outProgram,
+                                                                 unsigned int numKernelLines,
+                                                                 const char **kernelProgram,
+                                                                 const char *buildOptions = NULL,
+                                                                 const bool openclCXX = false);
-extern int create_single_kernel_helper_create_program_for_device(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *buildOptions = NULL);
-/* Creates OpenCL C++ program. This one must be used for creating OpenCL C++
- * program. */
-extern int create_openclcpp_program(cl_context context, cl_program *outProgram,
+/* Creates OpenCL C++ program. This one must be used for creating OpenCL C++ program. */
+extern int create_openclcpp_program(cl_context context, 
+                                    cl_program *outProgram,
                                     unsigned int numKernelLines,
                                     const char **kernelProgram,
                                     const char *buildOptions = NULL);
 /* Builds program (outProgram) and creates one kernel */
-int build_program_create_kernel_helper(
-    cl_context context, cl_program *outProgram, cl_kernel *outKernel,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *kernelName, const char *buildOptions = NULL);
+int build_program_create_kernel_helper(cl_context context,
+                                       cl_program *outProgram,
+                                       cl_kernel *outKernel,
+                                       unsigned int numKernelLines,
+                                       const char **kernelProgram,
+                                       const char *kernelName,
+                                       const char *buildOptions = NULL);
-/* Helper to obtain the biggest fit work group size for all the devices in a
- * given group and for the given global thread size */
-extern int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
-                                          size_t globalThreadSize,
-                                          size_t *outSize);
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
-/* Helper to obtain the biggest fit work group size for all the devices in a
- * given group and for the given global thread size */
-extern int get_max_common_2D_work_group_size(cl_context context,
-                                             cl_kernel kernel,
-                                             size_t *globalThreadSize,
-                                             size_t *outSizes);
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
-/* Helper to obtain the biggest fit work group size for all the devices in a
- * given group and for the given global thread size */
-extern int get_max_common_3D_work_group_size(cl_context context,
-                                             cl_kernel kernel,
-                                             size_t *globalThreadSize,
-                                             size_t *outSizes);
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
-/* Helper to obtain the biggest allowed work group size for all the devices in a
- * given group */
-extern int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
-                                           size_t *outSize, size_t *outLimits);
+/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
+extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
 /* Helper to obtain the biggest allowed 1D work group size on a given device */
-extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
-                                                        cl_kernel kernel,
-                                                        size_t *outSize);
+extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize );
 /* Helper to determine if a device supports an image format */
-extern int is_image_format_supported(cl_context context, cl_mem_flags flags,
-                                     cl_mem_object_type image_type,
-                                     const cl_image_format *fmt);
+extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
 /* Helper to get pixel size for a pixel format */
-size_t get_pixel_bytes(const cl_image_format *fmt);
+size_t get_pixel_bytes( const cl_image_format *fmt );
 /* Verify the given device supports images. */
-extern test_status verifyImageSupport(cl_device_id device);
+extern test_status verifyImageSupport( cl_device_id device );
-/* Checks that the given device supports images. Same as verify, but doesn't
- * print an error */
-extern int checkForImageSupport(cl_device_id device);
-extern int checkFor3DImageSupport(cl_device_id device);
-extern int checkForReadWriteImageSupport(cl_device_id device);
+/* Checks that the given device supports images. Same as verify, but doesn't print an error */
+extern int checkForImageSupport( cl_device_id device );
+extern int checkFor3DImageSupport( cl_device_id device );
-/* Checks that a given queue property is supported on the specified device.
- * Returns 1 if supported, 0 if not or an error. */
-extern int checkDeviceForQueueSupport(cl_device_id device,
-                                      cl_command_queue_properties prop);
+/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
+extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
-/* Helper to obtain the min alignment for a given context, i.e the max of all
- * min alignments for devices attached to the context*/
+/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
 size_t get_min_alignment(cl_context context);
-/* Helper to obtain the default rounding mode for single precision computation.
- * (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
-cl_device_fp_config get_default_rounding_mode(cl_device_id device);
+/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
+cl_device_fp_config get_default_rounding_mode( cl_device_id device );
-#define PASSIVE_REQUIRE_IMAGE_SUPPORT(device)                                  \
-    if (checkForImageSupport(device))                                          \
-    {                                                                          \
-        log_info(                                                              \
-            "\n\tNote: device does not support images. Skipping test...\n");   \
-        return TEST_SKIPPED_ITSELF;                                            \
+#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device )    \
+    if( checkForImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support images. Skipping test...\n" );    \
+        return 0;    \
-#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)                               \
-    if (checkFor3DImageSupport(device))                                        \
-    {                                                                          \
-        log_info("\n\tNote: device does not support 3D images. Skipping "      \
-                 "test...\n");                                                 \
-        return TEST_SKIPPED_ITSELF;                                            \
+#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )    \
+    if( checkFor3DImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" );    \
+        return 0;    \
-#define PASSIVE_REQUIRE_FP16_SUPPORT(device)                                   \
-    if (!device_supports_half(device))                                         \
-    {                                                                          \
-        log_info(                                                              \
-            "\n\tNote: device does not support fp16. Skipping test...\n");     \
-        return TEST_SKIPPED_ITSELF;                                            \
+#define PASSIVE_REQUIRE_FP16_SUPPORT(device)                            \
+    if (!is_extension_available(device, "cl_khr_fp16"))                 \
+    {                                                                   \
+        log_info("\n\tNote: device does not support fp16. Skipping test...\n"); \
+        return 0;                                                       \
-/* Prints out the standard device header for all tests given the device to print
- * for */
-extern int printDeviceHeader(cl_device_id device);
-// Execute the CL_DEVICE_OPENCL_C_VERSION query and return the OpenCL C version
-// is supported by the device.
-Version get_device_cl_c_version(cl_device_id device);
-// Gets the latest (potentially non-backward compatible) OpenCL C version
-// supported by the device.
-Version get_device_latest_cl_c_version(cl_device_id device);
-// Gets the maximum universally supported OpenCL C version in a context, i.e.
-// the OpenCL C version supported by all devices in a context.
-Version get_max_OpenCL_C_for_context(cl_context context);
-// Checks whether a particular OpenCL C version is supported by the device.
-bool device_supports_cl_c_version(cl_device_id device, Version version);
-// Poll fn every interval_ms until timeout_ms or it returns true
-bool poll_until(unsigned timeout_ms, unsigned interval_ms,
-                std::function<bool()> fn);
-// Checks whether the device supports double data types
-bool device_supports_double(cl_device_id device);
-// Checks whether the device supports half data types
-bool device_supports_half(cl_device_id device);
+/* Prints out the standard device header for all tests given the device to print for */
+extern int printDeviceHeader( cl_device_id device );
 #endif // _kernelHelpers_h
diff --git a/test_common/harness/mingw_compat.c b/test_common/harness/mingw_compat.c
index 5b38472..54c4463 100644
--- a/test_common/harness/mingw_compat.c
+++ b/test_common/harness/mingw_compat.c
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,44 +19,41 @@
 #include <stdio.h>
 #include <string.h>
-// This function is unavailable on various mingw compilers,
-// especially 64 bit so implementing it here
-const char *basename_dot = ".";
-char *basename(char *path)
+//This function is unavailable on various mingw compilers,
+//especially 64 bit so implementing it here
+const char *basename_dot=".";
+basename(char *path)
     char *p = path, *b = NULL;
     int len = strlen(path);
-    if (path == NULL)
-    {
-        return (char *)basename_dot;
+    if (path == NULL) {
+        return (char*)basename_dot;
     // Not absolute path on windows
-    if (path[1] != ':')
-    {
+    if (path[1] != ':') {
         return path;
     // Trim trailing path seperators
-    if (path[len - 1] == '\\' || path[len - 1] == '/')
-    {
+    if (path[len - 1]  == '\\' ||
+        path[len - 1]  == '/' ) {
         path[len] = '\0';
-    while (len)
-    {
-        while ((*p != '\\' || *p != '/') && len)
-        {
+    while (len) {
+        while((*p != '\\' || *p != '/')  && len) {
         b = p;
-    }
+     }
-    return b;
+     return b;
\ No newline at end of file
diff --git a/test_common/harness/mingw_compat.h b/test_common/harness/mingw_compat.h
index a509c75..ab28f39 100644
--- a/test_common/harness/mingw_compat.h
+++ b/test_common/harness/mingw_compat.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -21,7 +21,7 @@
 #include <malloc.h>
 #if defined(__MINGW64__)
-// mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
+//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
 #define __mingw_aligned_malloc _aligned_malloc
 #define __mingw_aligned_free _aligned_free
 #include <stddef.h>
diff --git a/test_common/harness/msvc9.c b/test_common/harness/msvc9.c
index 29b45d6..1c0cf2b 100644
--- a/test_common/harness/msvc9.c
+++ b/test_common/harness/msvc9.c
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,7 +15,7 @@
 #include "compat.h"
-#if defined(_MSC_VER)
+#if defined ( _MSC_VER )
 #include <limits.h>
 #include <stdlib.h>
@@ -24,7 +24,7 @@
 #include <windows.h>
-#if _MSC_VER < 1900 && !defined(__INTEL_COMPILER)
+#if _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
@@ -32,12 +32,9 @@
-float copysignf(float x, float y)
+float copysignf( float x, float y )
-    union {
-        cl_uint u;
-        float f;
-    } ux, uy;
+    union{ cl_uint u; float f; }ux, uy;
     ux.f = x;
     uy.f = y;
@@ -47,12 +44,9 @@
     return ux.f;
-double copysign(double x, double y)
+double copysign( double x, double y )
-    union {
-        cl_ulong u;
-        double f;
-    } ux, uy;
+    union{ cl_ulong u; double f; }ux, uy;
     ux.f = x;
     uy.f = y;
@@ -62,16 +56,13 @@
     return ux.f;
-long double copysignl(long double x, long double y)
+long double copysignl( long double x, long double y )
-    union {
+    union
+    {
         long double f;
-        struct
-        {
-            cl_ulong m;
-            cl_ushort sexp;
-        } u;
-    } ux, uy;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
+    }ux, uy;
     ux.f = x;
     uy.f = y;
@@ -85,12 +76,12 @@
     float absx = fabsf(x);
-    if (absx < 8388608.0f /* 0x1.0p23f */)
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
-        float magic = copysignf(8388608.0f /* 0x1.0p23f */, x);
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
         float rounded = x + magic;
         rounded -= magic;
-        x = copysignf(rounded, x);
+        x = copysignf( rounded, x );
     return x;
@@ -100,12 +91,12 @@
     double absx = fabs(x);
-    if (absx < 4503599627370496.0 /* 0x1.0p52f */)
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
-        double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x);
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
         double rounded = x + magic;
         rounded -= magic;
-        x = copysign(rounded, x);
+        x = copysign( rounded, x );
     return x;
@@ -115,13 +106,12 @@
     double absx = fabs(x);
-    if (absx < 9223372036854775808.0L /* 0x1.0p64f */)
+    if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
-        long double magic =
-            copysignl(9223372036854775808.0L /* 0x1.0p63L */, x);
+        long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
         long double rounded = x + magic;
         rounded -= magic;
-        x = copysignl(rounded, x);
+        x = copysignl( rounded, x );
     return x;
@@ -135,31 +125,30 @@
 #ifndef FP_ILOGB0
-#define FP_ILOGB0 INT_MIN
+    #define FP_ILOGB0   INT_MIN
 #ifndef FP_ILOGBNAN
+    #define FP_ILOGBNAN INT_MIN
-int ilogb(double x)
+int ilogb (double x)
-    union {
-        double f;
-        cl_ulong u;
-    } u;
+    union{ double f; cl_ulong u;} u;
     u.f = x;
     cl_ulong absx = u.u & CL_LONG_MAX;
-    if (absx - 0x0001000000000000ULL
-        >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
+    if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
-        switch (absx)
+        switch( absx )
-            case 0: return FP_ILOGB0;
-            case 0x7ff0000000000000ULL: return INT_MAX;
+            case 0:
+                return FP_ILOGB0;
+            case 0x7ff0000000000000ULL:
+                return INT_MAX;
-                if (absx > 0x7ff0000000000000ULL) return FP_ILOGBNAN;
+                if( absx > 0x7ff0000000000000ULL )
+                    return FP_ILOGBNAN;
                 // subnormal
                 u.u = absx | 0x3ff0000000000000ULL;
@@ -172,23 +161,23 @@
-int ilogbf(float x)
+int ilogbf (float x)
-    union {
-        float f;
-        cl_uint u;
-    } u;
+    union{ float f; cl_uint u;} u;
     u.f = x;
     cl_uint absx = u.u & 0x7fffffff;
-    if (absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
+    if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
-        switch (absx)
+        switch( absx )
-            case 0: return FP_ILOGB0;
-            case 0x7f800000U: return INT_MAX;
+            case 0:
+                return FP_ILOGB0;
+            case 0x7f800000U:
+                return INT_MAX;
-                if (absx > 0x7f800000) return FP_ILOGBNAN;
+                if( absx > 0x7f800000 )
+                    return FP_ILOGBNAN;
                 // subnormal
                 u.u = absx | 0x3f800000U;
@@ -200,33 +189,32 @@
     return (absx >> 23) - 127;
-int ilogbl(long double x)
+int ilogbl (long double x)
-    union {
+    union
+    {
         long double f;
-        struct
-        {
-            cl_ulong m;
-            cl_ushort sexp;
-        } u;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
     } u;
     u.f = x;
     int exp = u.u.sexp & 0x7fff;
-    if (0 == exp)
+    if( 0 == exp )
-        if (0 == u.u.m) return FP_ILOGB0;
+        if( 0 == u.u.m )
+            return FP_ILOGB0;
-        // subnormal
+        //subnormal
         u.u.sexp = 0x3fff;
         u.f -= 1.0f;
         exp = u.u.sexp & 0x7fff;
         return exp - (0x3fff + 0x3ffe);
-    else if (0x7fff == exp)
+    else if( 0x7fff == exp )
-        if (u.u.m & CL_LONG_MAX) return FP_ILOGBNAN;
+        if( u.u.m & CL_LONG_MAX )
+            return FP_ILOGBNAN;
         return INT_MAX;
@@ -244,10 +232,7 @@
 static void GET_BITS_SP32(float fx, unsigned int* ux)
-    volatile union {
-        float f;
-        unsigned int u;
-    } _bitsy;
+    volatile union {float f; unsigned int u;} _bitsy;
     _bitsy.f = (fx);
     *ux = _bitsy.u;
@@ -259,10 +244,7 @@
 /* } */
 static void PUT_BITS_SP32(unsigned int ux, float* fx)
-    volatile union {
-        float f;
-        unsigned int u;
-    } _bitsy;
+    volatile union {float f; unsigned int u;} _bitsy;
     _bitsy.u = (ux);
     *fx = _bitsy.f;
@@ -274,19 +256,13 @@
 /* } */
 static void GET_BITS_DP64(double dx, unsigned __int64* lx)
-    volatile union {
-        double d;
-        unsigned __int64 l;
-    } _bitsy;
+    volatile union {double d; unsigned __int64 l;} _bitsy;
     _bitsy.d = (dx);
     *lx = _bitsy.l;
 static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
-    volatile union {
-        double d;
-        unsigned __int64 l;
-    } _bitsy;
+    volatile union {double d; unsigned __int64 l;} _bitsy;
     _bitsy.l = (lx);
     *dx = _bitsy.d;
@@ -311,7 +287,8 @@
    that x is NaN; gcc does. */
 double fmax(double x, double y)
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
     return x >= y ? x : y;
@@ -324,15 +301,17 @@
 double fmin(double x, double y)
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
     return x <= y ? x : y;
-float fmaxf(float x, float y)
+float fmaxf( float x, float y )
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
     return x >= y ? x : y;
@@ -344,31 +323,31 @@
 float fminf(float x, float y)
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
     return x <= y ? x : y;
 long double scalblnl(long double x, long n)
-    union {
+    union
+    {
         long double d;
-        struct
-        {
-            cl_ulong m;
-            cl_ushort sexp;
-        } u;
-    } u;
+        struct{ cl_ulong m; cl_ushort sexp;}u;
+    }u;
     u.u.m = CL_LONG_MIN;
-    if (x == 0.0L || n < -2200) return copysignl(0.0L, x);
+    if( x == 0.0L || n < -2200)
+        return copysignl( 0.0L, x );
-    if (n > 2200) return INFINITY;
+    if( n > 2200 )
+        return INFINITY;
-    if (n < 0)
+    if( n < 0 )
         u.u.sexp = 0x3fff - 1022;
-        while (n <= -1022)
+        while( n <= -1022 )
             x *= u.d;
             n += 1022;
@@ -378,10 +357,10 @@
         return x;
-    if (n > 0)
+    if( n > 0 )
         u.u.sexp = 0x3fff + 1023;
-        while (n >= 1023)
+        while( n >= 1023 )
             x *= u.d;
             n -= 1023;
@@ -399,12 +378,15 @@
 //                          log2
-const static cl_double log_e_base2 = 1.4426950408889634074;
-const static cl_double log_10_base2 = 3.3219280948873623478;
+const static cl_double log_e_base2   = 1.4426950408889634074;
+const static cl_double log_10_base2  = 3.3219280948873623478;
-// double log10(double x);
+//double log10(double x);
-double log2(double x) { return 1.44269504088896340735992468100189214 * log(x); }
+double log2(double x)
+    return 1.44269504088896340735992468100189214 * log(x);
 long double log2l(long double x)
@@ -415,23 +397,23 @@
     double absx = fabs(x);
-    if (absx < 4503599627370496.0 /* 0x1.0p52f */)
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
         cl_long rounded = x;
-        x = copysign((double)rounded, x);
+        x = copysign( (double) rounded, x );
     return x;
-float truncf(float x)
+float  truncf(float x)
     float absx = fabsf(x);
-    if (absx < 8388608.0f /* 0x1.0p23f */)
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
         cl_int rounded = x;
-        x = copysignf((float)rounded, x);
+        x = copysignf( (float) rounded, x );
     return x;
@@ -441,69 +423,75 @@
     double absx = fabs(x);
-    if (absx < 0.5) return 0;
+    if( absx < 0.5 )
+        return 0;
-    if (absx < 4503599627370496.0 /* 0x1.0p52 */)
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
         absx += 0.5;
         cl_long rounded = absx;
         absx = rounded;
-        x = copysign(absx, x);
+        x = copysign( absx, x );
-    if (x >= (double)LONG_MAX) return LONG_MAX;
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
-    return (long)x;
+    return (long) x;
 long lroundf(float x)
     float absx = fabsf(x);
-    if (absx < 0.5f) return 0;
+    if( absx < 0.5f )
+        return 0;
-    if (absx < 8388608.0f)
+    if( absx < 8388608.0f )
         absx += 0.5f;
         cl_int rounded = absx;
         absx = rounded;
-        x = copysignf(absx, x);
+        x = copysignf(  absx, x );
-    if (x >= (float)LONG_MAX) return LONG_MAX;
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
-    return (long)x;
+    return (long) x;
 double round(double x)
     double absx = fabs(x);
-    if (absx < 0.5) return copysign(0.0, x);
+    if( absx < 0.5 )
+        return copysign( 0.0, x);
-    if (absx < 4503599627370496.0 /* 0x1.0p52 */)
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
         absx += 0.5;
         cl_long rounded = absx;
         absx = rounded;
-        x = copysign(absx, x);
+        x = copysign( absx, x );
     return x;
-float roundf(float x)
+float  roundf(float x)
     float absx = fabsf(x);
-    if (absx < 0.5f) return copysignf(0.0f, x);
+    if( absx < 0.5f )
+        return copysignf( 0.0f, x);
-    if (absx < 8388608.0f)
+    if( absx < 8388608.0f )
         absx += 0.5f;
         cl_int rounded = absx;
         absx = rounded;
-        x = copysignf(absx, x);
+        x = copysignf( absx, x );
     return x;
@@ -513,59 +501,65 @@
     long double absx = fabsl(x);
-    if (absx < 0.5L) return copysignl(0.0L, x);
+    if( absx < 0.5L )
+        return copysignl( 0.0L, x);
-    if (absx < 9223372036854775808.0L /*0x1.0p63L*/)
+    if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
         absx += 0.5L;
         cl_ulong rounded = absx;
         absx = rounded;
-        x = copysignl(absx, x);
+        x = copysignl( absx, x );
     return x;
-float cbrtf(float x)
+float cbrtf( float x )
-    float z = pow(fabs((double)x), 1.0 / 3.0);
-    return copysignf(z, x);
+    float z = pow( fabs((double) x), 1.0 / 3.0 );
+    return copysignf( z, x );
-double cbrt(double x) { return copysign(pow(fabs(x), 1.0 / 3.0), x); }
+double cbrt( double x )
+    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
-long int lrint(double x)
+long int lrint (double x)
     double absx = fabs(x);
-    if (x >= (double)LONG_MAX) return LONG_MAX;
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
-    if (absx < 4503599627370496.0 /* 0x1.0p52 */)
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
-        double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x);
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
         double rounded = x + magic;
         rounded -= magic;
-        return (long int)rounded;
+        return (long int) rounded;
-    return (long int)x;
+    return (long int) x;
-long int lrintf(float x)
+long int lrintf (float x)
     float absx = fabsf(x);
-    if (x >= (float)LONG_MAX) return LONG_MAX;
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
-    if (absx < 8388608.0f /* 0x1.0p23f */)
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
-        float magic = copysignf(8388608.0f /* 0x1.0p23f */, x);
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
         float rounded = x + magic;
         rounded -= magic;
-        return (long int)rounded;
+        return (long int) rounded;
-    return (long int)x;
+    return (long int) x;
 #endif // _MSC_VER < 1900
@@ -580,12 +574,13 @@
 int fetestexcept(int excepts)
     unsigned int status = _statusfp();
-    return excepts
-        & (((status & _SW_INEXACT) ? FE_INEXACT : 0)
-           | ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)
-           | ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)
-           | ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0)
-           | ((status & _SW_INVALID) ? FE_INVALID : 0));
+    return excepts & (
+        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
+        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
+        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
+        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
+        ((status & _SW_INVALID) ? FE_INVALID : 0)
+    );
 int feclearexcept(int excepts)
@@ -597,36 +592,33 @@
 #endif // __INTEL_COMPILER
-#if _MSC_VER < 1900 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER < 1300)
+#if _MSC_VER < 1900 && ( ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 )
-float nanf(const char* str)
+float nanf( const char* str)
-    cl_uint u = atoi(str);
+    cl_uint u = atoi( str );
     u |= 0x7fc00000U;
-    return *(float*)(&u);
+    return *( float*)(&u);
-double nan(const char* str)
+double nan( const char* str)
-    cl_ulong u = atoi(str);
+    cl_ulong u = atoi( str );
     u |= 0x7ff8000000000000ULL;
-    return *(double*)(&u);
+    return *( double*)(&u);
 // double check this implementatation
-long double nanl(const char* str)
+long double nanl( const char* str)
-    union {
+    union
+    {
         long double f;
-        struct
-        {
-            cl_ulong m;
-            cl_ushort sexp;
-        } u;
-    } u;
+        struct { cl_ulong m; cl_ushort sexp; }u;
+    }u;
     u.u.sexp = 0x7fff;
-    u.u.m = 0x8000000000000000ULL | atoi(str);
+    u.u.m = 0x8000000000000000ULL | atoi( str );
     return u.f;
@@ -640,35 +632,32 @@
-// This function is commented out because the Windows implementation should
-never call munmap.
+// This function is commented out because the Windows implementation should never call munmap.
 // If it is calling it, we have a bug. Please file a bugzilla.
 int munmap(void *addr, size_t len)
-// FIXME: this is not correct.  munmap is like free()
+// FIXME: this is not correct.  munmap is like free()
     return (int)VirtualAlloc( (LPVOID)addr, len,
-uint64_t ReadTime(void)
+uint64_t ReadTime( void )
     LARGE_INTEGER current;
     return (uint64_t)current.QuadPart;
-double SubtractTime(uint64_t endTime, uint64_t startTime)
+double SubtractTime( uint64_t endTime, uint64_t startTime )
     static double PerformanceFrequency = 0.0;
-    if (PerformanceFrequency == 0.0)
-    {
+    if (PerformanceFrequency == 0.0) {
         LARGE_INTEGER frequency;
-        PerformanceFrequency = (double)frequency.QuadPart;
+        PerformanceFrequency = (double) frequency.QuadPart;
     return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
@@ -676,38 +665,40 @@
 int cf_signbit(double x)
-    union {
+    union
+    {
         double f;
         cl_ulong u;
-    } u;
+    }u;
     u.f = x;
     return u.u >> 63;
 int cf_signbitf(float x)
-    union {
+    union
+    {
         float f;
         cl_uint u;
-    } u;
+    }u;
     u.f = x;
     return u.u >> 31;
-float int2float(int32_t ix)
+float int2float (int32_t ix)
     union {
-        float f;
+        float   f;
         int32_t i;
     } u;
     u.i = ix;
     return u.f;
-int32_t float2int(float fx)
+int32_t float2int (float   fx)
     union {
-        float f;
+        float   f;
         int32_t i;
     } u;
     u.f = fx;
@@ -731,50 +722,27 @@
     return 31 - res;
     unsigned long index;
-    unsigned char res = _BitScanReverse(&index, pattern);
-    if (res)
-    {
-        return 8 * sizeof(int) - 1 - index;
-    }
-    else
-    {
-        return 8 * sizeof(int);
+    unsigned char res = _BitScanReverse( &index, pattern);
+    if (res) {
+        return 8*sizeof(int) - 1 - index;
+    } else {
+        return 8*sizeof(int);
 int __builtin_clz(unsigned int pattern)
-    int count;
-    if (pattern == 0u)
-    {
-        return 32;
-    }
-    count = 31;
-    if (pattern >= 1u << 16)
-    {
-        pattern >>= 16;
-        count -= 16;
-    }
-    if (pattern >= 1u << 8)
-    {
-        pattern >>= 8;
-        count -= 8;
-    }
-    if (pattern >= 1u << 4)
-    {
-        pattern >>= 4;
-        count -= 4;
-    }
-    if (pattern >= 1u << 2)
-    {
-        pattern >>= 2;
-        count -= 2;
-    }
-    if (pattern >= 1u << 1)
-    {
-        count -= 1;
-    }
-    return count;
+   int count;
+   if (pattern == 0u) {
+       return 32;
+   }
+   count = 31;
+   if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
+   if (pattern >=  1u<<8) { pattern >>=  8; count -=  8; }
+   if (pattern >=  1u<<4) { pattern >>=  4; count -=  4; }
+   if (pattern >=  1u<<2) { pattern >>=  2; count -=  2; }
+   if (pattern >=  1u<<1) {                 count -=  1; }
+   return count;
 #endif // !defined(_WIN64)
@@ -788,9 +756,9 @@
     return 0;
-unsigned int sleep(unsigned int sec)
+unsigned int sleep( unsigned int sec )
-    Sleep(sec * 1000);
+    Sleep( sec * 1000 );
     return 0;
diff --git a/test_common/harness/mt19937.cpp b/test_common/harness/mt19937.cpp
index c32d9ba..a4fbf59 100644
--- a/test_common/harness/mt19937.cpp
+++ b/test_common/harness/mt19937.cpp
@@ -26,8 +26,8 @@
@@ -51,15 +51,15 @@
 #include "harness/alloc.h"
 #ifdef __SSE2__
-#include <emmintrin.h>
+    #include <emmintrin.h>
 /* Period parameters */
-#define N 624 /* vector code requires multiple of 4 here */
+#define N 624   /* vector code requires multiple of 4 here */
 #define M 397
-#define MATRIX_A (cl_uint)0x9908b0dfUL /* constant vector a */
-#define UPPER_MASK (cl_uint)0x80000000UL /* most significant w-r bits */
-#define LOWER_MASK (cl_uint)0x7fffffffUL /* least significant r bits */
+#define MATRIX_A    (cl_uint) 0x9908b0dfUL   /* constant vector a */
+#define UPPER_MASK  (cl_uint) 0x80000000UL /* most significant w-r bits */
+#define LOWER_MASK  (cl_uint) 0x7fffffffUL /* least significant r bits */
 typedef struct _MTdata
@@ -67,27 +67,26 @@
 #ifdef __SSE2__
     cl_uint cache[N];
-    cl_int mti;
-} _MTdata;
+    cl_int  mti;
 /* initializes mt[N] with a seed */
 MTdata init_genrand(cl_uint s)
-    MTdata r = (MTdata)align_malloc(sizeof(_MTdata), 16);
-    if (NULL != r)
+    MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
+    if( NULL != r )
         cl_uint *mt = r->mt;
         int mti = 0;
-        mt[0] = s; // & 0xffffffffUL;
-        for (mti = 1; mti < N; mti++)
-        {
-            mt[mti] = (cl_uint)(
-                1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti);
+        mt[0]= s; // & 0xffffffffUL;
+        for (mti=1; mti<N; mti++) {
+            mt[mti] = (cl_uint)
+            (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
             /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
             /* In the previous versions, MSBs of the seed affect   */
             /* only MSBs of the array mt[].                        */
             /* 2002/01/09 modified by Makoto Matsumoto             */
-            // mt[mti] &= 0xffffffffUL;
+    //        mt[mti] &= 0xffffffffUL;
             /* for >32 bit machines */
         r->mti = mti;
@@ -96,22 +95,20 @@
     return r;
-void free_mtdata(MTdata d)
+void    free_mtdata( MTdata d )
-    if (d) align_free(d);
+    if(d)
+        align_free(d);
 /* generates a random number on [0,0xffffffff]-interval */
-cl_uint genrand_int32(MTdata d)
+cl_uint genrand_int32( MTdata d)
     /* mag01[x] = x * MATRIX_A  for x=0,1 */
-    static const cl_uint mag01[2] = { 0x0UL, MATRIX_A };
+    static const cl_uint mag01[2]={0x0UL, MATRIX_A};
 #ifdef __SSE2__
     static volatile int init = 0;
-    static union {
-        __m128i v;
-        cl_uint s[4];
-    } upper_mask, lower_mask, one, matrix_a, c0, c1;
+    static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
@@ -123,17 +120,14 @@
         int kk;
 #ifdef __SSE2__
-        if (0 == init)
+        if( 0 == init )
-            upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] =
-                upper_mask.s[3] = UPPER_MASK;
-            lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] =
-                lower_mask.s[3] = LOWER_MASK;
+            upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
+            lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
             one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
-            matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] =
-                MATRIX_A;
-            c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint)0x9d2c5680UL;
-            c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint)0xefc60000UL;
+            matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
+            c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
+            c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
             init = 1;
@@ -141,89 +135,61 @@
         kk = 0;
 #ifdef __SSE2__
         // vector loop
-        for (; kk + 4 <= N - M; kk += 4)
+        for( ; kk + 4 <= N-M; kk += 4 )
-            // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
-            __m128i vy = _mm_or_si128(
-                _mm_and_si128(_mm_load_si128((__m128i *)(mt + kk)),
-                              upper_mask.v),
-                _mm_and_si128(_mm_loadu_si128((__m128i *)(mt + kk + 1)),
-                              lower_mask.v));
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
-            // y & 1 ? -1 : 0
-            __m128i mask = _mm_cmpeq_epi32(_mm_and_si128(vy, one.v), one.v);
-            // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
-            __m128i vmag01 = _mm_and_si128(mask, matrix_a.v);
-            // mt[kk+M] ^ (y >> 1)
-            __m128i vr =
-                _mm_xor_si128(_mm_loadu_si128((__m128i *)(mt + kk + M)),
-                              (__m128i)_mm_srli_epi32(vy, 1));
-            // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
-            vr = _mm_xor_si128(vr, vmag01);
-            _mm_store_si128((__m128i *)(mt + kk), vr);
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) );    // mt[kk+M] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
-        for (; kk < N - M; kk++)
-        {
-            y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
-            mt[kk] = mt[kk + M] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
+        for ( ;kk<N-M;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
 #ifdef __SSE2__
         // advance to next aligned location
-        for (; kk < N - 1 && (kk & 3); kk++)
-        {
-            y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
-            mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
+        for (;kk<N-1 && (kk & 3);kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
         // vector loop
-        for (; kk + 4 <= N - 1; kk += 4)
+        for( ; kk + 4 <= N-1; kk += 4 )
-            __m128i vy = _mm_or_si128(
-                _mm_and_si128(_mm_load_si128((__m128i *)(mt + kk)),
-                              upper_mask.v),
-                // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
-                _mm_and_si128(_mm_loadu_si128((__m128i *)(mt + kk + 1)),
-                              lower_mask.v));
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
-            // y & 1 ? -1 : 0
-            __m128i mask = _mm_cmpeq_epi32(_mm_and_si128(vy, one.v), one.v);
-            // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
-            __m128i vmag01 = _mm_and_si128(mask, matrix_a.v);
-            // mt[kk+M-N] ^ (y >> 1)
-            __m128i vr =
-                _mm_xor_si128(_mm_loadu_si128((__m128i *)(mt + kk + M - N)),
-                              _mm_srli_epi32(vy, 1));
-            // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
-            vr = _mm_xor_si128(vr, vmag01);
-            _mm_store_si128((__m128i *)(mt + kk), vr);
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) );          // mt[kk+M-N] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
-        for (; kk < N - 1; kk++)
-        {
-            y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
-            mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
+        for (;kk<N-1;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
-        y = (cl_uint)((mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK));
-        mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
+        y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
+        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
 #ifdef __SSE2__
         // Do the tempering ahead of time in vector code
-        for (kk = 0; kk + 4 <= N; kk += 4)
+        for( kk = 0; kk + 4 <= N; kk += 4 )
-            // y = mt[k];
-            __m128i vy = _mm_load_si128((__m128i *)(mt + kk));
-            // y ^= (y >> 11);
-            vy = _mm_xor_si128(vy, _mm_srli_epi32(vy, 11));
-            // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
-            vy = _mm_xor_si128(vy, _mm_and_si128(_mm_slli_epi32(vy, 7), c0.v));
-            // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
-            vy = _mm_xor_si128(vy, _mm_and_si128(_mm_slli_epi32(vy, 15), c1.v));
-            // y ^= (y >> 18);
-            vy = _mm_xor_si128(vy, _mm_srli_epi32(vy, 18));
-            _mm_store_si128((__m128i *)(d->cache + kk), vy);
+            __m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) );                            // y = mt[k];
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) );                             // y ^= (y >> 11);
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) );        // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) );       // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) );                             // y ^= (y >> 18);
+            _mm_store_si128( (__m128i*)(d->cache+kk), vy );
@@ -236,8 +202,8 @@
     /* Tempering */
     y ^= (y >> 11);
-    y ^= (y << 7) & (cl_uint)0x9d2c5680UL;
-    y ^= (y << 15) & (cl_uint)0xefc60000UL;
+    y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+    y ^= (y << 15) & (cl_uint) 0xefc60000UL;
     y ^= (y >> 18);
@@ -245,35 +211,35 @@
     return y;
-cl_ulong genrand_int64(MTdata d)
+cl_ulong genrand_int64( MTdata d)
-    return ((cl_ulong)genrand_int32(d) << 32) | (cl_uint)genrand_int32(d);
+    return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
 /* generates a random number on [0,1]-real-interval */
 double genrand_real1(MTdata d)
-    return genrand_int32(d) * (1.0 / 4294967295.0);
+    return genrand_int32(d)*(1.0/4294967295.0);
     /* divided by 2^32-1 */
 /* generates a random number on [0,1)-real-interval */
 double genrand_real2(MTdata d)
-    return genrand_int32(d) * (1.0 / 4294967296.0);
+    return genrand_int32(d)*(1.0/4294967296.0);
     /* divided by 2^32 */
 /* generates a random number on (0,1)-real-interval */
 double genrand_real3(MTdata d)
-    return (((double)genrand_int32(d)) + 0.5) * (1.0 / 4294967296.0);
+    return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
     /* divided by 2^32 */
 /* generates a random number on [0,1) with 53-bit resolution*/
 double genrand_res53(MTdata d)
-    unsigned long a = genrand_int32(d) >> 5, b = genrand_int32(d) >> 6;
-    return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
+    unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
+    return(a*67108864.0+b)*(1.0/9007199254740992.0);
diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h
index 35c8493..85786e7 100644
--- a/test_common/harness/mt19937.h
+++ b/test_common/harness/mt19937.h
@@ -31,8 +31,8 @@
@@ -47,12 +47,12 @@
 #ifndef MT19937_H
-#define MT19937_H 1
+#define MT19937_H   1
-#if defined(__APPLE__)
-#include <OpenCL/cl_platform.h>
+#if defined( __APPLE__ )
+    #include <OpenCL/cl_platform.h>
-#include <CL/cl_platform.h>
+    #include <CL/cl_platform.h>
@@ -61,50 +61,52 @@
  *      on each thread.
-typedef struct _MTdata *MTdata;
+typedef struct _MTdata  *MTdata;
 /* Create the random number generator with seed */
-MTdata init_genrand(cl_uint /*seed*/);
+MTdata init_genrand( cl_uint /*seed*/ );
 /* release memory used by a MTdata private data */
-void free_mtdata(MTdata /*data*/);
+void   free_mtdata( MTdata /*data*/ );
 /* generates a random number on [0,0xffffffff]-interval */
-cl_uint genrand_int32(MTdata /*data*/);
+cl_uint genrand_int32( MTdata /*data*/);
 /* generates a random number on [0,0xffffffffffffffffULL]-interval */
-cl_ulong genrand_int64(MTdata /*data*/);
+cl_ulong genrand_int64( MTdata /*data*/);
 /* generates a random number on [0,1]-real-interval */
-double genrand_real1(MTdata /*data*/);
+double genrand_real1( MTdata /*data*/);
 /* generates a random number on [0,1)-real-interval */
-double genrand_real2(MTdata /*data*/);
+double genrand_real2( MTdata /*data*/);
 /* generates a random number on (0,1)-real-interval */
-double genrand_real3(MTdata /*data*/);
+double genrand_real3( MTdata /*data*/);
 /* generates a random number on [0,1) with 53-bit resolution*/
-double genrand_res53(MTdata /*data*/);
+double genrand_res53( MTdata /*data*/ );
 #ifdef __cplusplus
 #include <cassert>
-struct MTdataHolder
-    MTdataHolder(cl_uint seed)
-    {
+struct MTdataHolder {
+    MTdataHolder(cl_uint seed) {
         m_mtdata = init_genrand(seed);
         assert(m_mtdata != nullptr);
-    MTdataHolder(MTdata mtdata): m_mtdata(mtdata) {}
+    MTdataHolder(MTdata mtdata) : m_mtdata(mtdata) {}
-    ~MTdataHolder() { free_mtdata(m_mtdata); }
+    ~MTdataHolder() {
+        free_mtdata(m_mtdata);
+    }
-    operator MTdata() const { return m_mtdata; }
+    operator MTdata () const {
+        return m_mtdata;
+    }
     MTdata m_mtdata;
@@ -112,4 +114,4 @@
 #endif // #ifdef __cplusplus
-#endif /* MT19937_H */
+#endif  /* MT19937_H */
diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp
index cd350cf..00e7a6b 100644
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -20,10 +20,10 @@
 // C++ interface.
 // =================================================================================================
-#include <cerrno> // errno, error constants
-#include <climits> // PATH_MAX
-#include <cstdlib> // abort, _splitpath, _makepath
-#include <cstring> // strdup, strerror_r
+#include <cerrno>     // errno, error constants
+#include <climits>    // PATH_MAX
+#include <cstdlib>    // abort, _splitpath, _makepath
+#include <cstring>    // strdup, strerror_r
 #include <sstream>
 #include <vector>
@@ -32,141 +32,131 @@
 #include <android/api-level.h>
-#define CHECK_PTR(ptr)                                                         \
-    if ((ptr) == NULL)                                                         \
-    {                                                                          \
-        abort();                                                               \
+#define CHECK_PTR( ptr )    \
+    if ( (ptr) == NULL ) {  \
+        abort();            \
-typedef std::vector<char> buffer_t;
+typedef std::vector< char > buffer_t;
-#if !defined(PATH_MAX)
-#define PATH_MAX 1000
+#if ! defined( PATH_MAX )
+    #define PATH_MAX 1000
-int const _size = PATH_MAX + 1; // Initial buffer size for path.
-int const _count = 8; // How many times we will try to double buffer size.
+int const _size  = PATH_MAX + 1;    // Initial buffer size for path.
+int const _count = 8;               // How many times we will try to double buffer size.
 // -------------------------------------------------------------------------------------------------
 // MacOS X
 // -------------------------------------------------------------------------------------------------
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
-#include <mach-o/dyld.h> // _NSGetExecutablePath
-#include <libgen.h> // dirname
+    #include <mach-o/dyld.h>    // _NSGetExecutablePath
+    #include <libgen.h>         // dirname
-static std::string
-_err_msg(int err, // Error number (e. g. errno).
-         int level // Nesting level, for avoiding infinite recursion.
+    static
+    std::string
+    _err_msg(
+        int err,     // Error number (e. g. errno).
+        int level    // Nesting level, for avoiding infinite recursion.
+    ) {
-    /*
-        There are 3 incompatible versions of strerror_r:
+        /*
+            There are 3 incompatible versions of strerror_r:
-            char * strerror_r( int, char *, size_t );  // GNU version
-            int    strerror_r( int, char *, size_t );  // BSD version
-            int    strerror_r( int, char *, size_t );  // XSI version
+                char * strerror_r( int, char *, size_t );  // GNU version
+                int    strerror_r( int, char *, size_t );  // BSD version
+                int    strerror_r( int, char *, size_t );  // XSI version
-        BSD version returns error code, while XSI version returns 0 or -1 and
-       sets errno.
+            BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
-    */
+        */
-    // BSD version of strerror_r.
-    buffer_t buffer(100);
-    int count = _count;
-    for (;;)
-    {
-        int rc = strerror_r(err, &buffer.front(), buffer.size());
-        if (rc == EINVAL)
-        {
-            // Error code is not recognized, but anyway we got the message.
-            return &buffer.front();
-        }
-        else if (rc == ERANGE)
-        {
-            // Buffer is not enough.
-            if (count > 0)
-            {
-                // Enlarge the buffer.
-                --count;
-                buffer.resize(buffer.size() * 2);
-            }
-            else
-            {
+        // BSD version of strerror_r.
+        buffer_t buffer( 100 );
+        int      count = _count;
+        for ( ; ; ) {
+            int rc = strerror_r( err, & buffer.front(), buffer.size() );
+            if ( rc == EINVAL ) {
+                // Error code is not recognized, but anyway we got the message.
+                return & buffer.front();
+            } else if ( rc == ERANGE ) {
+                // Buffer is not enough.
+                if ( count > 0 ) {
+                    // Enlarge the buffer.
+                    -- count;
+                    buffer.resize( buffer.size() * 2 );
+                } else {
+                    std::stringstream ostr;
+                    ostr
+                        << "Error " << err << " "
+                        << "(Getting error message failed: "
+                        << "Buffer of " << buffer.size() << " bytes is still too small"
+                        << ")";
+                    return ostr.str();
+                }; // if
+            } else if ( rc == 0 ) {
+                // We got the message.
+                return & buffer.front();
+            } else {
                 std::stringstream ostr;
-                ostr << "Error " << err << " "
-                     << "(Getting error message failed: "
-                     << "Buffer of " << buffer.size()
-                     << " bytes is still too small"
-                     << ")";
+                ostr
+                    << "Error " << err << " "
+                    << "(Getting error message failed: "
+                    << ( level < 2 ? _err_msg( rc, level + 1 ) : "Oops" )
+                    << ")";
                 return ostr.str();
             }; // if
-        }
-        else if (rc == 0)
-        {
-            // We got the message.
-            return &buffer.front();
-        }
-        else
-        {
-            std::stringstream ostr;
-            ostr << "Error " << err << " "
-                 << "(Getting error message failed: "
-                 << (level < 2 ? _err_msg(rc, level + 1) : "Oops") << ")";
-            return ostr.str();
-        }; // if
-    }; // forever
+        }; // forever
-} // _err_msg
+    } // _err_msg
-std::string dir_sep() { return "/"; } // dir_sep
+    std::string
+    dir_sep(
+    ) {
+        return "/";
+    } // dir_sep
-std::string exe_path()
-    buffer_t path(_size);
-    int count = _count;
-    for (;;)
-    {
-        uint32_t size = path.size();
-        int rc = _NSGetExecutablePath(&path.front(), &size);
-        if (rc == 0)
-        {
-            break;
-        }; // if
-        if (count > 0)
-        {
-            --count;
-            path.resize(size);
-        }
-        else
-        {
-            log_error("ERROR: Getting executable path failed: "
-                      "_NSGetExecutablePath failed: Buffer of %lu bytes is "
-                      "still too small\n",
-                      (unsigned long)path.size());
-            exit(2);
-        }; // if
-    }; // forever
-    return &path.front();
-} // exe_path
+    std::string
+    exe_path(
+    ) {
+        buffer_t path( _size );
+        int      count = _count;
+        for ( ; ; ) {
+            uint32_t size = path.size();
+            int rc = _NSGetExecutablePath( & path.front(), & size );
+            if ( rc == 0 ) {
+                break;
+            }; // if
+            if ( count > 0 ) {
+                -- count;
+                path.resize( size );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "_NSGetExecutablePath failed: Buffer of %lu bytes is still too small\n",
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+        }; // forever
+        return & path.front();
+    } // exe_path
-std::string exe_dir()
-    std::string path = exe_path();
-    // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its
-    // argument.
-    buffer_t buffer(path.c_str(),
-                    path.c_str() + path.size() + 1); // Copy with trailing zero.
-    return dirname(&buffer.front());
-} // exe_dir
+    std::string
+    exe_dir(
+    ) {
+        std::string path = exe_path();
+        // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
+        buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
+        return dirname( & buffer.front() );
+    } // exe_dir
 #endif // __APPLE__
@@ -175,153 +165,149 @@
 // Linux
 // -------------------------------------------------------------------------------------------------
-#if defined(__linux__)
+#if defined( __linux__ )
-#include <cerrno> // errno
-#include <libgen.h> // dirname
-#include <unistd.h> // readlink
+    #include <cerrno>      // errno
+    #include <libgen.h>    // dirname
+    #include <unistd.h>    // readlink
-static std::string _err_msg(int err, int level)
+    static
+    std::string
+    _err_msg(
+        int err,
+        int level
+    ) {
-    /*
-        There are 3 incompatible versions of strerror_r:
+        /*
+            There are 3 incompatible versions of strerror_r:
-            char * strerror_r( int, char *, size_t );  // GNU version
-            int    strerror_r( int, char *, size_t );  // BSD version
-            int    strerror_r( int, char *, size_t );  // XSI version
+                char * strerror_r( int, char *, size_t );  // GNU version
+                int    strerror_r( int, char *, size_t );  // BSD version
+                int    strerror_r( int, char *, size_t );  // XSI version
-        BSD version returns error code, while XSI version returns 0 or -1 and
-       sets errno.
+            BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
-    */
+        */
-#if (defined(__ANDROID__) && __ANDROID_API__ < 23)                             \
-    || ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE)
+        #if (defined(__ANDROID__) && __ANDROID_API__ < 23) || ( ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 ) && ! _GNU_SOURCE )
-// XSI version of strerror_r.
-#warning Not tested!
-    buffer_t buffer(200);
-    int count = _count;
-    for (;;)
-    {
-        int rc = strerror_r(err, &buffer.front(), buffer.size());
-        if (rc == -1)
-        {
-            int _err = errno;
-            if (_err == ERANGE)
-            {
-                if (count > 0)
-                {
-                    // Enlarge the buffer.
-                    --count;
-                    buffer.resize(buffer.size() * 2);
-                }
-                else
-                {
-                    std::stringstream ostr;
-                    ostr << "Error " << err << " "
-                         << "(Getting error message failed: "
-                         << "Buffer of " << buffer.size()
-                         << " bytes is still too small"
-                         << ")";
-                    return ostr.str();
+            // XSI version of strerror_r.
+            #warning Not tested!
+            buffer_t buffer( 200 );
+            int      count = _count;
+            for ( ; ; ) {
+                int rc = strerror_r( err, & buffer.front(), buffer.size() );
+                if ( rc == -1 ) {
+                    int _err = errno;
+                    if ( _err == ERANGE ) {
+                        if ( count > 0 ) {
+                            // Enlarge the buffer.
+                            -- count;
+                            buffer.resize( buffer.size() * 2 );
+                        } else {
+                            std::stringstream ostr;
+                            ostr
+                                << "Error " << err << " "
+                                << "(Getting error message failed: "
+                                << "Buffer of " << buffer.size() << " bytes is still too small"
+                                << ")";
+                            return ostr.str();
+                        }; // if
+                    } else {
+                        std::stringstream ostr;
+                        ostr
+                            << "Error " << err << " "
+                            << "(Getting error message failed: "
+                            << ( level < 2 ? _err_msg( _err, level + 1 ) : "Oops" )
+                            << ")";
+                        return ostr.str();
+                    }; // if
+                } else {
+                    // We got the message.
+                    return & buffer.front();
                 }; // if
-            }
-            else
-            {
-                std::stringstream ostr;
-                ostr << "Error " << err << " "
-                     << "(Getting error message failed: "
-                     << (level < 2 ? _err_msg(_err, level + 1) : "Oops") << ")";
-                return ostr.str();
+            }; // forever
+        #else
+            // GNU version of strerror_r.
+            char buffer[ 2000 ];
+            return strerror_r( err, buffer, sizeof( buffer ) );
+        #endif
+    } // _err_msg
+    std::string
+    dir_sep(
+    ) {
+        return "/";
+    } // dir_sep
+    std::string
+    exe_path(
+    ) {
+        static std::string const exe = "/proc/self/exe";
+        buffer_t    path( _size );
+        int         count = _count;  // Max number of iterations.
+        for ( ; ; ) {
+            ssize_t len = readlink( exe.c_str(), & path.front(), path.size() );
+            if ( len < 0 ) {
+                // Oops.
+                int err = errno;
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Reading symlink `%s' failed: %s\n",
+                    exe.c_str(), err_msg( err ).c_str()
+                );
+                exit( 2 );
             }; // if
-        }
-        else
-        {
-            // We got the message.
-            return &buffer.front();
-        }; // if
-    }; // forever
+            if ( len < path.size() ) {
+                // We got the path.
+                path.resize( len );
+                break;
+            }; // if
-    // GNU version of strerror_r.
-    char buffer[2000];
-    return strerror_r(err, buffer, sizeof(buffer));
+            // Oops, buffer is too small.
+            if ( count > 0 ) {
+                -- count;
+                // Enlarge the buffer.
+                path.resize( path.size() * 2 );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Reading symlink `%s' failed: Buffer of %lu bytes is still too small\n",
+                    exe.c_str(),
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+        }; // forever
-} // _err_msg
+        return std::string( & path.front(), path.size() );
+    } // exe_path
-std::string dir_sep() { return "/"; } // dir_sep
-std::string exe_path()
-    static std::string const exe = "/proc/self/exe";
-    buffer_t path(_size);
-    int count = _count; // Max number of iterations.
-    for (;;)
-    {
-        ssize_t len = readlink(exe.c_str(), &path.front(), path.size());
-        if (len < 0)
-        {
-            // Oops.
-            int err = errno;
-            log_error("ERROR: Getting executable path failed: "
-                      "Reading symlink `%s' failed: %s\n",
-                      exe.c_str(), err_msg(err).c_str());
-            exit(2);
-        }; // if
-        if (len < path.size())
-        {
-            // We got the path.
-            path.resize(len);
-            break;
-        }; // if
-        // Oops, buffer is too small.
-        if (count > 0)
-        {
-            --count;
-            // Enlarge the buffer.
-            path.resize(path.size() * 2);
-        }
-        else
-        {
-            log_error("ERROR: Getting executable path failed: "
-                      "Reading symlink `%s' failed: Buffer of %lu bytes is "
-                      "still too small\n",
-                      exe.c_str(), (unsigned long)path.size());
-            exit(2);
-        }; // if
-    }; // forever
-    return std::string(&path.front(), path.size());
-} // exe_path
-std::string exe_dir()
-    std::string path = exe_path();
-    // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its
-    // argument.
-    buffer_t buffer(path.c_str(),
-                    path.c_str() + path.size() + 1); // Copy with trailing zero.
-    return dirname(&buffer.front());
-} // exe_dir
+    std::string
+    exe_dir(
+    ) {
+        std::string path = exe_path();
+        // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
+        buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
+        return dirname( & buffer.front() );
+    } // exe_dir
 #endif // __linux__
@@ -329,196 +315,212 @@
 // MS Windows
 // -------------------------------------------------------------------------------------------------
-#if defined(_WIN32)
+#if defined( _WIN32 )
-#include <windows.h>
-#if defined(max)
-#undef max
+    #include <windows.h>
+    #if defined( max )
+        #undef max
+    #endif
-#include <cctype>
-#include <algorithm>
+    #include <cctype>
+    #include <algorithm>
-static std::string _err_msg(int err, int level)
+    static
+    std::string
+    _err_msg(
+        int err,
+        int level
+    ) {
-    std::string msg;
+        std::string msg;
-    LPSTR buffer = NULL;
+        LPSTR  buffer = NULL;
+        DWORD  flags  =
-    DWORD len = FormatMessageA(flags, NULL, err, LANG_USER_DEFAULT,
-                               reinterpret_cast<LPSTR>(&buffer), 0, NULL);
-    if (buffer == NULL || len == 0)
-    {
-        int _err = GetLastError();
-        char str[1024] = { 0 };
-        snprintf(str, sizeof(str),
-                 "Error 0x%08x (Getting error message failed: %s )", err,
-                 (level < 2 ? _err_msg(_err, level + 1).c_str() : "Oops"));
-        msg = std::string(str);
-    }
-    else
-    {
-        // Trim trailing whitespace (including `\r' and `\n').
-        while (len > 0 && isspace(buffer[len - 1]))
-        {
-            --len;
-        }; // while
-        // Drop trailing full stop.
-        if (len > 0 && buffer[len - 1] == '.')
-        {
-            --len;
-        }; // if
-        msg.assign(buffer, len);
-    }; // if
-    if (buffer != NULL)
-    {
-        LocalFree(buffer);
-    }; // if
-    return msg;
-} // _get_err_msg
-std::string dir_sep() { return "\\"; } // dir_sep
-std::string exe_path()
-    buffer_t path(_size);
-    int count = _count;
-    for (;;)
-    {
-        DWORD len = GetModuleFileNameA(NULL, &path.front(), path.size());
-        if (len == 0)
-        {
-            int err = GetLastError();
-            log_error("ERROR: Getting executable path failed: %s\n",
-                      err_msg(err).c_str());
-            exit(2);
-        }; // if
-        if (len < path.size())
-        {
-            path.resize(len);
-            break;
-        }; // if
-        // Buffer too small.
-        if (count > 0)
-        {
-            --count;
-            path.resize(path.size() * 2);
-        }
-        else
-        {
-            log_error("ERROR: Getting executable path failed: "
-                      "Buffer of %lu bytes is still too small\n",
-                      (unsigned long)path.size());
-            exit(2);
-        }; // if
-    }; // forever
-    return std::string(&path.front(), path.size());
-} // exe_path
-std::string exe_dir()
-    std::string exe = exe_path();
-    int count = 0;
-    // Splitting path into components.
-    buffer_t drv(_MAX_DRIVE);
-    buffer_t dir(_MAX_DIR);
-    count = _count;
-#if defined(_MSC_VER)
-    for (;;)
-    {
-        int rc =
-            _splitpath_s(exe.c_str(), &drv.front(), drv.size(), &dir.front(),
-                         dir.size(), NULL, 0, // We need neither name
-                         NULL, 0 // nor extension
+        DWORD len =
+            FormatMessageA(
+                flags,
+                NULL,
+                err,
+                LANG_USER_DEFAULT,
+                reinterpret_cast< LPSTR >( & buffer ),
+                0,
+                NULL
-        if (rc == 0)
-        {
-            break;
-        }
-        else if (rc == ERANGE)
-        {
-            if (count > 0)
-            {
-                --count;
-                // Buffer is too small, but it is not clear which one.
-                // So we have to enlarge all.
-                drv.resize(drv.size() * 2);
-                dir.resize(dir.size() * 2);
-            }
-            else
-            {
-                log_error("ERROR: Getting executable path failed: "
-                          "Splitting path `%s' to components failed: "
-                          "Buffers of %lu and %lu bytes are still too small\n",
-                          exe.c_str(), (unsigned long)drv.size(),
-                          (unsigned long)dir.size());
-                exit(2);
+        if ( buffer == NULL || len == 0 ) {
+            int _err = GetLastError();
+            char str[1024] = { 0 };
+            snprintf(str, sizeof(str), "Error 0x%08x (Getting error message failed: %s )", err, ( level < 2 ? _err_msg( _err, level + 1 ).c_str() : "Oops" ));
+            msg = std::string(str);
+        } else {
+            // Trim trailing whitespace (including `\r' and `\n').
+            while ( len > 0 && isspace( buffer[ len - 1 ] ) ) {
+                -- len;
+            }; // while
+            // Drop trailing full stop.
+            if ( len > 0 && buffer[ len - 1 ] == '.' ) {
+                -- len;
             }; // if
-        }
-        else
-        {
-            log_error("ERROR: Getting executable path failed: "
-                      "Splitting path `%s' to components failed: %s\n",
-                      exe.c_str(), err_msg(rc).c_str());
-            exit(2);
+            msg.assign( buffer, len );
+        }; //if
+        if ( buffer != NULL ) {
+            LocalFree( buffer );
         }; // if
-    }; // forever
+        return msg;
+    } // _get_err_msg
+    std::string
+    dir_sep(
+    ) {
+        return "\\";
+    } // dir_sep
+    std::string
+    exe_path(
+    ) {
+        buffer_t path( _size );
+        int      count = _count;
+        for ( ; ; ) {
+            DWORD len = GetModuleFileNameA( NULL, & path.front(), path.size() );
+            if ( len == 0 ) {
+                int err = GetLastError();
+                log_error( "ERROR: Getting executable path failed: %s\n", err_msg( err ).c_str() );
+                exit( 2 );
+            }; // if
+            if ( len < path.size() ) {
+                path.resize( len );
+                break;
+            }; // if
+            // Buffer too small.
+            if ( count > 0 ) {
+                -- count;
+                path.resize( path.size() * 2 );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Buffer of %lu bytes is still too small\n",
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+        }; // forever
+        return std::string( & path.front(), path.size() );
+    } // exe_path
+    std::string
+    exe_dir(
+    ) {
+        std::string exe = exe_path();
+        int count = 0;
+        // Splitting path into components.
+        buffer_t drv( _MAX_DRIVE );
+        buffer_t dir( _MAX_DIR   );
+        count = _count;
+#if defined(_MSC_VER)
+        for ( ; ; ) {
+            int rc =
+                _splitpath_s(
+                    exe.c_str(),
+                    & drv.front(), drv.size(),
+                    & dir.front(), dir.size(),
+                    NULL, 0,   // We need neither name
+                    NULL, 0    // nor extension
+                );
+            if ( rc == 0 ) {
+                break;
+            } else if ( rc == ERANGE ) {
+                if ( count > 0 ) {
+                    -- count;
+                    // Buffer is too small, but it is not clear which one.
+                    // So we have to enlarge all.
+                    drv.resize( drv.size() * 2 );
+                    dir.resize( dir.size() * 2 );
+                } else {
+                    log_error(
+                        "ERROR: Getting executable path failed: "
+                        "Splitting path `%s' to components failed: "
+                        "Buffers of %lu and %lu bytes are still too small\n",
+                        exe.c_str(),
+                        (unsigned long) drv.size(),
+                        (unsigned long) dir.size()
+                    );
+                    exit( 2 );
+                }; // if
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Splitting path `%s' to components failed: %s\n",
+                    exe.c_str(),
+                    err_msg( rc ).c_str()
+                );
+                exit( 2 );
+            }; // if
+        }; // forever
 #else // __MINGW32__
-    // MinGW does not have the "secure" _splitpath_s, use the insecure version
-    // instead.
-    _splitpath(exe.c_str(), &drv.front(), &dir.front(),
-               NULL, // We need neither name
-               NULL // nor extension
-    );
+        // MinGW does not have the "secure" _splitpath_s, use the insecure version instead.
+        _splitpath(
+            exe.c_str(),
+            & drv.front(),
+            & dir.front(),
+            NULL,   // We need neither name
+            NULL    // nor extension
+        );
 #endif // __MINGW32__
-    // Combining components back to path.
-    // I failed with "secure" `_makepath_s'. If buffer is too small, instead of
-    // returning ERANGE, `_makepath_s' pops up dialog box and offers to debug
-    // the program. D'oh! So let us try to guess the size of result and go with
-    // insecure `_makepath'.
-    buffer_t path(std::max(drv.size() + dir.size(), size_t(_MAX_PATH)) + 10);
-    _makepath(&path.front(), &drv.front(), &dir.front(), NULL, NULL);
+        // Combining components back to path.
+        // I failed with "secure" `_makepath_s'. If buffer is too small, instead of returning
+        // ERANGE, `_makepath_s' pops up dialog box and offers to debug the program. D'oh!
+        // So let us try to guess the size of result and go with insecure `_makepath'.
+        buffer_t path( std::max( drv.size() + dir.size(), size_t( _MAX_PATH ) ) + 10 );
+        _makepath( & path.front(), & drv.front(), & dir.front(), NULL, NULL );
-    return &path.front();
+        return & path.front();
-} // exe_dir
+    } // exe_dir
 #endif // _WIN32
-std::string err_msg(int err) { return _err_msg(err, 0); } // err_msg
+    int err
+) {
+    return _err_msg( err, 0 );
+} // err_msg
 // =================================================================================================
@@ -526,34 +528,39 @@
 // =================================================================================================
-char* get_err_msg(int err)
-    char* msg = strdup(err_msg(err).c_str());
-    CHECK_PTR(msg);
+char *
+    int err
+) {
+    char * msg = strdup( err_msg( err ).c_str() );
+    CHECK_PTR( msg );
     return msg;
 } // get_err_msg
-char* get_dir_sep()
-    char* sep = strdup(dir_sep().c_str());
-    CHECK_PTR(sep);
+char *
+) {
+    char * sep = strdup( dir_sep().c_str() );
+    CHECK_PTR( sep );
     return sep;
 } // get_dir_sep
-char* get_exe_path()
-    char* path = strdup(exe_path().c_str());
-    CHECK_PTR(path);
+char *
+) {
+    char * path = strdup( exe_path().c_str() );
+    CHECK_PTR( path );
     return path;
 } // get_exe_path
-char* get_exe_dir()
-    char* dir = strdup(exe_dir().c_str());
-    CHECK_PTR(dir);
+char *
+) {
+    char * dir = strdup( exe_dir().c_str() );
+    CHECK_PTR( dir );
     return dir;
 } // get_exe_dir
diff --git a/test_common/harness/os_helpers.h b/test_common/harness/os_helpers.h
index aa3080d..7c4463f 100644
--- a/test_common/harness/os_helpers.h
+++ b/test_common/harness/os_helpers.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,12 +24,12 @@
 #ifdef __cplusplus
-#include <string>
+    #include <string>
-std::string err_msg(int err);
-std::string dir_sep();
-std::string exe_path();
-std::string exe_dir();
+    std::string err_msg( int err );
+    std::string dir_sep();
+    std::string exe_path();
+    std::string exe_dir();
 #endif // __cplusplus
@@ -37,9 +37,9 @@
 // C interface.
 // -------------------------------------------------------------------------------------------------
-char* get_err_msg(int err); // Returns system error message. Subject to free.
-char* get_dir_sep(); // Returns dir separator. Subject to free.
-char* get_exe_path(); // Returns path of current executable. Subject to free.
-char* get_exe_dir(); // Returns dir of current executable. Subject to free.
+char * get_err_msg( int err );  // Returns system error message. Subject to free.
+char * get_dir_sep();           // Returns dir separator. Subject to free.
+char * get_exe_path();          // Returns path of current executable. Subject to free.
+char * get_exe_dir();           // Returns dir of current executable. Subject to free.
 #endif // __os_helpers_h__
diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp
index b2ab5b0..1706730 100644
--- a/test_common/harness/parseParameters.cpp
+++ b/test_common/harness/parseParameters.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -29,58 +29,47 @@
 #define DEFAULT_COMPILATION_PROGRAM "cl_offline_compiler"
-CompilationMode gCompilationMode = kOnline;
+CompilationMode      gCompilationMode = kOnline;
 CompilationCacheMode gCompilationCacheMode = kCacheModeCompileIfAbsent;
-std::string gCompilationCachePath = ".";
-std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
+std::string          gCompilationCachePath = ".";
+std::string          gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
-void helpInfo()
+void helpInfo ()
-    log_info(
-        R"(Common options:
-    -h, --help
-        This help
-    --compilation-mode <mode>
-        Specify a compilation mode.  Mode can be:
-            online     Use online compilation (default)
-            binary     Use binary offline compilation
-            spir-v     Use SPIR-V offline compilation
-For offline compilation (binary and spir-v modes) only:
-    --compilation-cache-mode <cache-mode>
-        Specify a compilation caching mode:
-            compile-if-absent
-                Read from cache if already populated, or else perform
-                offline compilation (default)
-            force-read
-                Force reading from the cache
-            overwrite
-                Disable reading from the cache
-            dump-cl-files
-                Dumps the .cl and build .options files used by the test suite
-    --compilation-cache-path <path>
-        Path for offline compiler output and CL source
-    --compilation-program <prog>
-        Program to use for offline compilation, defaults to:
-            )" DEFAULT_COMPILATION_PROGRAM "\n\n");
+    log_info("Common options:\n"
+             "        -h, --help                  This help\n"
+             "        --compilation-mode <mode>   Specify a compilation mode.  Mode can be:\n"
+             "                           online     Use online compilation (default)\n"
+             "                           binary     Use binary offline compilation\n"
+             "                           spir-v     Use SPIR-V offline compilation\n"
+             "\n"
+             "    For offline compilation (binary and spir-v modes) only:\n"
+             "        --compilation-cache-mode <cache-mode>  Specify a compilation caching mode:\n"
+             "                                 compile-if-absent  Read from cache if already populated, or\n"
+             "                                                    else perform offline compilation (default)\n"
+             "                                 force-read        Force reading from the cache\n"
+             "                                 overwrite         Disable reading from the cache\n"
+             "                                 dump-cl-files     Dumps the .cl and build .options files used by the test suite\n"
+             "        --compilation-cache-path <path>   Path for offline compiler output and CL source\n"
+             "        --compilation-program <prog>      Program to use for offline compilation,\n"
+             "                                          defaults to " DEFAULT_COMPILATION_PROGRAM "\n"
+             "\n");
-int parseCustomParam(int argc, const char *argv[], const char *ignore)
+int parseCustomParam (int argc, const char *argv[], const char *ignore)
     int delArg = 0;
-    for (int i = 1; i < argc; i++)
+    for (int i=1; i<argc; i++)
-        if (ignore != 0)
+        if(ignore != 0)
-            // skip parameters that require special/different treatment in
-            // application (generic interpretation and parameter removal will
-            // not be performed)
-            const char *ptr = strstr(ignore, argv[i]);
-            if (ptr != 0 && (ptr == ignore || ptr[-1] == ' ')
-                && // first on list or ' ' before
-                (ptr[strlen(argv[i])] == 0
-                 || ptr[strlen(argv[i])] == ' ')) // last on list or ' ' after
+            // skip parameters that require special/different treatment in application
+            // (generic interpretation and parameter removal will not be performed)
+            const char * ptr = strstr(ignore, argv[i]);
+            if(ptr != 0 &&
+               (ptr == ignore || ptr[-1] == ' ') && //first on list or ' ' before
+               (ptr[strlen(argv[i])] == 0 || ptr[strlen(argv[i])] == ' ')) // last on list or ' ' after
@@ -91,7 +80,7 @@
             // Note: we don't increment delArg to delete this argument,
             // to allow the caller's argument parsing routine to see the
             // option and print its own help.
-            helpInfo();
+            helpInfo ();
         else if (!strcmp(argv[i], "--compilation-mode"))
@@ -153,18 +142,15 @@
-                    log_error("Compilation cache mode not recognized: %s\n",
-                              mode);
+                    log_error("Compilation cache mode not recognized: %s\n", mode);
                     return -1;
                 log_info("Compilation cache mode specified: %s\n", mode);
-                log_error(
-                    "Compilation cache mode parameters are incorrect. Usage:\n"
-                    "  --compilation-cache-mode "
-                    "<compile-if-absent|force-read|overwrite>\n");
+                log_error("Compilation cache mode parameters are incorrect. Usage:\n"
+                          "  --compilation-cache-mode <compile-if-absent|force-read|overwrite>\n");
                 return -1;
@@ -178,8 +164,7 @@
-                log_error("Path argument for --compilation-cache-path was not "
-                          "specified.\n");
+                log_error("Path argument for --compilation-cache-path was not specified.\n");
                 return -1;
@@ -193,34 +178,34 @@
-                log_error("Program argument for --compilation-program was not "
-                          "specified.\n");
+                log_error("Program argument for --compilation-program was not specified.\n");
                 return -1;
-        // cleaning parameters from argv tab
-        for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg];
+        //cleaning parameters from argv tab
+        for (int j = i; j < argc - delArg; j++)
+            argv[j] = argv[j + delArg];
         argc -= delArg;
         i -= delArg;
-    if ((gCompilationCacheMode == kCacheModeForceRead
-         || gCompilationCacheMode == kCacheModeOverwrite)
-        && gCompilationMode == kOnline)
+    if ((gCompilationCacheMode == kCacheModeForceRead || gCompilationCacheMode == kCacheModeOverwrite)
+         && gCompilationMode == kOnline)
-        log_error("Compilation cache mode can only be specified when using an "
-                  "offline compilation mode.\n");
+        log_error("Compilation cache mode can only be specified when using an offline compilation mode.\n");
         return -1;
     return argc;
-bool is_power_of_two(int number) { return number && !(number & (number - 1)); }
+bool is_power_of_two(int number)
+    return number && !(number & (number - 1));
-extern void parseWimpyReductionFactor(const char *&arg,
-                                      int &wimpyReductionFactor)
+extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor)
     const char *arg_temp = strchr(&arg[1], ']');
     if (arg_temp != 0)
@@ -229,15 +214,12 @@
         arg = arg_temp; // Advance until ']'
         if (is_power_of_two(new_factor))
-            log_info("\n Wimpy reduction factor changed from %d to %d \n",
-                     wimpyReductionFactor, new_factor);
+            log_info("\n Wimpy reduction factor changed from %d to %d \n", wimpyReductionFactor, new_factor);
             wimpyReductionFactor = new_factor;
-            log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be "
-                     "power of 2. The default value will be used.\n",
-                     new_factor);
+            log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be power of 2. The default value will be used.\n", new_factor);
diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h
index b0f8328..5dc28c5 100644
--- a/test_common/harness/parseParameters.h
+++ b/test_common/harness/parseParameters.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -39,10 +39,8 @@
 extern std::string gCompilationCachePath;
 extern std::string gCompilationProgram;
-extern int parseCustomParam(int argc, const char *argv[],
-                            const char *ignore = 0);
+extern int parseCustomParam (int argc, const char *argv[], const char *ignore = 0 );
-extern void parseWimpyReductionFactor(const char *&arg,
-                                      int &wimpyReductionFactor);
+extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor);
 #endif // _parseParameters_h
diff --git a/test_common/harness/propertyHelpers.cpp b/test_common/harness/propertyHelpers.cpp
deleted file mode 100644
index 3157ca8..0000000
--- a/test_common/harness/propertyHelpers.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "propertyHelpers.h"
-#include "errorHelpers.h"
-#include <assert.h>
-#include <algorithm>
-#include <vector>
-static bool findProperty(const std::vector<cl_properties>& props,
-                         cl_properties prop, cl_properties& value)
-    // This function assumes properties are valid:
-    assert(props.size() == 0 || props.back() == 0);
-    assert(props.size() == 0 || props.size() % 2 == 1);
-    for (cl_uint i = 0; i < props.size(); i = i + 2)
-    {
-        cl_properties check_prop = props[i];
-        if (check_prop == 0)
-        {
-            break;
-        }
-        if (check_prop == prop)
-        {
-            value = props[i + 1];
-            return true;
-        }
-    }
-    return false;
-int compareProperties(const std::vector<cl_properties>& queried,
-                      const std::vector<cl_properties>& check)
-    if (queried.size() != 0)
-    {
-        if (queried.back() != 0)
-        {
-            log_error("ERROR: queried properties do not end with 0!\n");
-            return TEST_FAIL;
-        }
-        if (queried.size() % 2 != 1)
-        {
-            log_error("ERROR: queried properties does not consist of "
-                      "property-value pairs!\n");
-            return TEST_FAIL;
-        }
-    }
-    if (check.size() != 0)
-    {
-        if (check.back() != 0)
-        {
-            log_error("ERROR: check properties do not end with 0!\n");
-            return TEST_FAIL;
-        }
-        if (check.size() % 2 != 1)
-        {
-            log_error("ERROR: check properties does not consist of "
-                      "property-value pairs!\n");
-            return TEST_FAIL;
-        }
-    }
-    if (queried != check)
-    {
-        for (cl_uint i = 0; i < check.size(); i = i + 2)
-        {
-            cl_properties check_prop = check[i];
-            if (check_prop == 0)
-            {
-                break;
-            }
-            cl_properties check_value = check[i + 1];
-            cl_properties queried_value = 0;
-            bool found = findProperty(queried, check_prop, queried_value);
-            if (!found)
-            {
-                log_error("ERROR: expected property 0x%x not found!\n",
-                          check_prop);
-                return TEST_FAIL;
-            }
-            else if (check_value != queried_value)
-            {
-                log_error("ERROR: mis-matched value for property 0x%x: wanted "
-                          "0x%x, got 0x%x\n",
-                          check_prop, check_value, queried_value);
-                return TEST_FAIL;
-            }
-        }
-        if (queried.size() > check.size())
-        {
-            log_error("ERROR: all properties found but there are extra "
-                      "properties: expected %d, got %d.\n",
-                      check.size(), queried.size());
-            return TEST_FAIL;
-        }
-        log_error("ERROR: properties were returned in the wrong order.\n");
-        return TEST_FAIL;
-    }
-    return TEST_PASS;
diff --git a/test_common/harness/propertyHelpers.h b/test_common/harness/propertyHelpers.h
deleted file mode 100644
index 68d16f6..0000000
--- a/test_common/harness/propertyHelpers.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef _propertyHelpers_h
-#define _propertyHelpers_h
-#include "compat.h"
-#include "testHarness.h"
-#include <vector>
-int compareProperties(const std::vector<cl_properties>& queried,
-                      const std::vector<cl_properties>& check);
-#endif // _propertyHelpers_h
diff --git a/test_common/harness/ref_counting.h b/test_common/harness/ref_counting.h
index cd6a316..1a2acee 100644
--- a/test_common/harness/ref_counting.h
+++ b/test_common/harness/ref_counting.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,39 +16,34 @@
 #ifndef _ref_counting_h
 #define _ref_counting_h
-#define MARK_REF_COUNT_BASE(c, type, bigType)                                  \
-    cl_uint c##_refCount;                                                      \
-    error = clGet##type##Info(c, CL_##bigType##_REFERENCE_COUNT,               \
-                              sizeof(c##_refCount), &c##_refCount, NULL);      \
-    test_error(error, "Unable to check reference count for " #type);
+#define MARK_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
+    test_error( error, "Unable to check reference count for " #type );
-#define TEST_REF_COUNT_BASE(c, type, bigType)                                  \
-    cl_uint c##_refCount_new;                                                  \
-    error =                                                                    \
-        clGet##type##Info(c, CL_##bigType##_REFERENCE_COUNT,                   \
-                          sizeof(c##_refCount_new), &c##_refCount_new, NULL);  \
-    test_error(error, "Unable to check reference count for " #type);           \
-    if (c##_refCount != c##_refCount_new)                                      \
-    {                                                                          \
-        log_error("ERROR: Reference count for " #type                          \
-                  " changed! (was %d, now %d)\n",                              \
-                  c##_refCount, c##_refCount_new);                             \
-        return -1;                                                             \
+#define TEST_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount_new; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
+    test_error( error, "Unable to check reference count for " #type ); \
+    if( c##_refCount != c##_refCount_new ) \
+    {    \
+        log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new );    \
+        return -1; \
+#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
+#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
 #endif // _ref_counting_h
diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp
index 681ccdd..ff38a7e 100644
--- a/test_common/harness/rounding_mode.cpp
+++ b/test_common/harness/rounding_mode.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,49 +15,46 @@
 #include "rounding_mode.h"
-#if (defined(__arm__) || defined(__aarch64__))
-#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
-#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
+#if (defined( __arm__ ) || defined(__aarch64__))
+    #define FPSCR_FZ    (1 << 24)       // Flush-To-Zero mode
+    #define FPSCR_ROUND_MASK (3 << 22)  // Rounding mode:
-#define _ARM_FE_FTZ 0x1000000
-#define _ARM_FE_NFTZ 0x0
-#if defined(__aarch64__)
-#define _FPU_GETCW(cw) __asm__("MRS %0,FPCR" : "=r"(cw))
-#define _FPU_SETCW(cw) __asm__("MSR FPCR,%0" : : "ri"(cw))
-#define _FPU_GETCW(cw) __asm__("VMRS %0,FPSCR" : "=r"(cw))
-#define _FPU_SETCW(cw) __asm__("VMSR FPSCR,%0" : : "ri"(cw))
+    #define _ARM_FE_FTZ     0x1000000
+    #define _ARM_FE_NFTZ    0x0
+    #if defined(__aarch64__)
+        #define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
+        #define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
+    #else
+        #define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
+        #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
+    #endif
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
-#define _ARM_FE_TONEAREST 0x0
-#define _ARM_FE_UPWARD 0x400000
-#define _ARM_FE_DOWNWARD 0x800000
-#define _ARM_FE_TOWARDZERO 0xc00000
-RoundingMode set_round(RoundingMode r, Type outType)
+#if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
+#define _ARM_FE_TONEAREST           0x0
+#define _ARM_FE_UPWARD              0x400000
+#define _ARM_FE_DOWNWARD            0x800000
+#define _ARM_FE_TOWARDZERO          0xc00000
+RoundingMode set_round( RoundingMode r, Type outType )
-    static const int flt_rounds[kRoundingModeCount] = {
-    };
-    static const int int_rounds[kRoundingModeCount] = {
-    };
+    static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
+                                                          _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
+    static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
+                                                          _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
     const int *p = int_rounds;
-    if (outType == kfloat || outType == kdouble) p = flt_rounds;
+    if( outType == kfloat || outType == kdouble )
+        p = flt_rounds;
     int fpscr = 0;
     RoundingMode oldRound = get_round();
-    _FPU_SETCW(p[r] | (fpscr & ~FPSCR_ROUND_MASK));
+    _FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
     return oldRound;
-RoundingMode get_round(void)
+RoundingMode get_round( void )
     int fpscr;
     int oldRound;
@@ -65,192 +62,180 @@
     oldRound = (fpscr & FPSCR_ROUND_MASK);
-    switch (oldRound)
+    switch( oldRound )
-        case _ARM_FE_TONEAREST: return kRoundToNearestEven;
-        case _ARM_FE_UPWARD: return kRoundUp;
-        case _ARM_FE_DOWNWARD: return kRoundDown;
-        case _ARM_FE_TOWARDZERO: return kRoundTowardZero;
+        case _ARM_FE_TONEAREST:
+            return kRoundToNearestEven;
+        case _ARM_FE_UPWARD:
+            return kRoundUp;
+        case _ARM_FE_DOWNWARD:
+            return kRoundDown;
+        case _ARM_FE_TOWARDZERO:
+            return kRoundTowardZero;
     return kDefaultRoundingMode;
 #elif !(defined(_WIN32) && defined(_MSC_VER))
-RoundingMode set_round(RoundingMode r, Type outType)
+RoundingMode set_round( RoundingMode r, Type outType )
-    static const int flt_rounds[kRoundingModeCount] = {
-    };
-    static const int int_rounds[kRoundingModeCount] = {
-    };
+    static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
     const int *p = int_rounds;
-    if (outType == kfloat || outType == kdouble) p = flt_rounds;
+    if( outType == kfloat || outType == kdouble )
+        p = flt_rounds;
     int oldRound = fegetround();
-    fesetround(p[r]);
+    fesetround( p[r] );
-    switch (oldRound)
+    switch( oldRound )
-        case FE_TONEAREST: return kRoundToNearestEven;
-        case FE_UPWARD: return kRoundUp;
-        case FE_DOWNWARD: return kRoundDown;
-        case FE_TOWARDZERO: return kRoundTowardZero;
-        default: abort(); // ??!
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
+        default:
+            abort();    // ??!
-    return kDefaultRoundingMode; // never happens
+    return kDefaultRoundingMode;    //never happens
-RoundingMode get_round(void)
+RoundingMode get_round( void )
     int oldRound = fegetround();
-    switch (oldRound)
+    switch( oldRound )
-        case FE_TONEAREST: return kRoundToNearestEven;
-        case FE_UPWARD: return kRoundUp;
-        case FE_DOWNWARD: return kRoundDown;
-        case FE_TOWARDZERO: return kRoundTowardZero;
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
     return kDefaultRoundingMode;
-RoundingMode set_round(RoundingMode r, Type outType)
+RoundingMode set_round( RoundingMode r, Type outType )
-    static const int flt_rounds[kRoundingModeCount] = { _RC_NEAR, _RC_NEAR,
-                                                        _RC_UP, _RC_DOWN,
-                                                        _RC_CHOP };
-    static const int int_rounds[kRoundingModeCount] = { _RC_CHOP, _RC_NEAR,
-                                                        _RC_UP, _RC_DOWN,
-                                                        _RC_CHOP };
-    const int *p =
-        (outType == kfloat || outType == kdouble) ? flt_rounds : int_rounds;
+    static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
     unsigned int oldRound;
-    int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
-    if (err)
-    {
-        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n",
-                   __FILE__, __LINE__);
-        return kDefaultRoundingMode; // what else never happens
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    if (err) {
+        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
+        return kDefaultRoundingMode;    //what else never happens
     oldRound &= _MCW_RC;
-    RoundingMode old = (oldRound == _RC_NEAR)
-        ? kRoundToNearestEven
-        : (oldRound == _RC_UP) ? kRoundUp
-                               : (oldRound == _RC_DOWN)
-                ? kRoundDown
-                : (oldRound == _RC_CHOP) ? kRoundTowardZero
-                                         : kDefaultRoundingMode;
+    RoundingMode old =
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
-    _controlfp_s(&oldRound, p[r], _MCW_RC); // setting new rounding mode
-    return old; // returning old rounding mode
+    _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
+    return old;    //returning old rounding mode
-RoundingMode get_round(void)
+RoundingMode get_round( void )
     unsigned int oldRound;
-    int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
     oldRound &= _MCW_RC;
-    return (oldRound == _RC_NEAR)
-        ? kRoundToNearestEven
-        : (oldRound == _RC_UP) ? kRoundUp
-                               : (oldRound == _RC_DOWN)
-                ? kRoundDown
-                : (oldRound == _RC_CHOP) ? kRoundTowardZero
-                                         : kDefaultRoundingMode;
+    return
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
-// FlushToZero() sets the host processor into ftz mode.  It is intended to have
-// a remote effect on the behavior of the code in basic_test_conversions.c. Some
-// host processors may not support this mode, which case you'll need to do some
-// clamping in software by testing against FLT_MIN or DBL_MIN in that file.
+// FlushToZero() sets the host processor into ftz mode.  It is intended to have a remote effect on the behavior of the code in
+// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
+// software by testing against FLT_MIN or DBL_MIN in that file.
-// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT*
-// have the behavior in section 7.5.3 of the OpenCL spec. They *ALWAYS* flush to
-// zero for subnormal inputs or outputs when FTZ mode is on like other basic
+// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT* have the behavior in section 7.5.3 of
+// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
 // operators do (e.g. add, subtract, multiply, divide, etc.)
 // Configuring hardware to FTZ mode varies by platform.
-// CAUTION: Some C implementations may also fail to behave properly in this
-// mode.
+// CAUTION: Some C implementations may also fail to behave properly in this mode.
 //  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
-//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR --
-//  provided that SSE/SSE2
-//          is used for floating point computation! If your OS uses x87, you'll
-//          need to figure out how to turn that off for the conversions code in
-//          basic_test_conversions.c so that they flush to zero properly.
-//          Otherwise, you'll need to add appropriate software clamping to
-//          basic_test_conversions.c in which case, these function are at
-//          liberty to do nothing.
+//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
+//          is used for floating point computation! If your OS uses x87, you'll need to figure out how
+//          to turn that off for the conversions code in basic_test_conversions.c so that they flush to
+//          zero properly.  Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
+//          in which case, these function are at liberty to do nothing.
-#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
-#include <xmmintrin.h>
-#elif defined(__PPC__)
-#include <fpu_control.h>
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
-void *FlushToZero(void)
+void *FlushToZero( void )
-#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
-    union {
-        int i;
-        void *p;
-    } u = { _mm_getcsr() };
-    _mm_setcsr(u.i | 0x8040);
-    return u.p;
-#elif defined(__arm__) || defined(__aarch64__)
-    int fpscr;
-    _FPU_GETCW(fpscr);
-    _FPU_SETCW(fpscr | FPSCR_FZ);
-    return NULL;
-#elif defined(__PPC__)
-    fpu_control_t flags = 0;
-    _FPU_GETCW(flags);
-    flags |= _FPU_MASK_NI;
-    _FPU_SETCW(flags);
-    return NULL;
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ int i;  void *p; }u = { _mm_getcsr() };
+        _mm_setcsr( u.i | 0x8040 );
+        return u.p;
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        int fpscr;
+        _FPU_GETCW(fpscr);
+        _FPU_SETCW(fpscr | FPSCR_FZ);
+        return NULL;
+    #elif defined( __PPC__ )
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags |= _FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        return NULL;
+        #else
+        #error Unknown arch
+    #endif
-#error Unknown arch
-#error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
-// Undo the effects of FlushToZero above, restoring the host to default
-// behavior, using the information passed in p.
-void UnFlushToZero(void *p)
+// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
+void UnFlushToZero( void *p)
-#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
-#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
-    union {
-        void *p;
-        int i;
-    } u = { p };
-    _mm_setcsr(u.i);
-#elif defined(__arm__) || defined(__aarch64__)
-    int fpscr;
-    _FPU_GETCW(fpscr);
-    _FPU_SETCW(fpscr & ~FPSCR_FZ);
-#elif defined(__PPC__)
-    fpu_control_t flags = 0;
-    _FPU_GETCW(flags);
-    flags &= ~_FPU_MASK_NI;
-    _FPU_SETCW(flags);
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ void *p; int i;  }u = { p };
+        _mm_setcsr( u.i );
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        int fpscr;
+        _FPU_GETCW(fpscr);
+        _FPU_SETCW(fpscr & ~FPSCR_FZ);
+    #elif defined( __PPC__)
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags &= ~_FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        #else
+        #error Unknown arch
+    #endif
-#error Unknown arch
-#error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
diff --git a/test_common/harness/rounding_mode.h b/test_common/harness/rounding_mode.h
index 064a3a6..8c0e8a1 100644
--- a/test_common/harness/rounding_mode.h
+++ b/test_common/harness/rounding_mode.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,11 +16,9 @@
 #ifndef __ROUNDING_MODE_H__
 #define __ROUNDING_MODE_H__
 #include "compat.h"
-#if (defined(_WIN32) && defined(_MSC_VER))
+#if (defined(_WIN32) && defined (_MSC_VER))
 #include "errorHelpers.h"
 #include "testHarness.h"
@@ -34,7 +32,7 @@
-} RoundingMode;
 typedef enum
@@ -49,14 +47,15 @@
     kulong = 8,
     klong = 9,
-    // This goes last
+    //This goes last
-} Type;
-extern RoundingMode set_round(RoundingMode r, Type outType);
-extern RoundingMode get_round(void);
-extern void *FlushToZero(void);
-extern void UnFlushToZero(void *p);
+extern RoundingMode set_round( RoundingMode r, Type outType );
+extern RoundingMode get_round( void );
+extern void *FlushToZero( void );
+extern void UnFlushToZero( void *p);
 #endif /* __ROUNDING_MODE_H__ */
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index 1aec3d0..04f12c7 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017-2019 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,7 +15,6 @@
 #include "testHarness.h"
 #include "compat.h"
-#include <algorithm>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -35,13 +34,13 @@
 #include <unistd.h>
-#if defined(__APPLE__)
+#if !defined(_WIN32) && !defined(__ANDROID__)
 #include <sys/sysctl.h>
 #include <time.h>
-#if !defined(__APPLE__)
+#if !defined (__APPLE__)
 #include <CL/cl.h>
@@ -52,114 +51,100 @@
 cl_uint gRandomSeed = 0;
 cl_uint gReSeed = 0;
-int gFlushDenormsToZero = 0;
-int gInfNanSupport = 1;
-int gIsEmbedded = 0;
-int gHasLong = 1;
-bool gCoreILProgram = true;
+int     gFlushDenormsToZero = 0;
+int     gInfNanSupport = 1;
+int     gIsEmbedded = 0;
+int     gIsOpenCL_C_1_0_Device = 0;
+int     gIsOpenCL_1_0_Device = 0;
+int     gHasLong = 1;
-#define DEFAULT_NUM_ELEMENTS 0x4000
+#define DEFAULT_NUM_ELEMENTS        0x4000
-int runTestHarness(int argc, const char *argv[], int testNum,
-                   test_definition testList[], int forceNoContextCreation,
-                   cl_command_queue_properties queueProps)
+int runTestHarness( int argc, const char *argv[], int testNum, test_definition testList[],
+                    int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
-    return runTestHarnessWithCheck(argc, argv, testNum, testList,
-                                   forceNoContextCreation, queueProps, NULL);
+    return runTestHarnessWithCheck( argc, argv, testNum, testList, forceNoContextCreation, queueProps,
+                          ( imageSupportRequired ) ? verifyImageSupport : NULL );
-int skip_init_info(int count)
+int skip_init_info(int count) {
     log_info("Test skipped while initialization\n");
     log_info("SKIPPED %d of %d tests.\n", count, count);
     return EXIT_SUCCESS;
-int fail_init_info(int count)
+int fail_init_info(int count) {
     log_info("Test failed while initialization\n");
     log_info("FAILED %d of %d tests.\n", count, count);
     return EXIT_FAILURE;
-void version_expected_info(const char *test_name, const char *api_name,
-                           const char *expected_version,
-                           const char *device_version)
-    log_info("%s skipped (requires at least %s version %s, but the device "
-             "reports %s version %s)\n",
-             test_name, api_name, expected_version, api_name, device_version);
+void version_expected_info(const char * test_name, const char * expected_version, const char * device_version) {
+    log_info("%s skipped (requires at least version %s, but the device reports version %s)\n",
+        test_name, expected_version, device_version);
-int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
-                            test_definition testList[],
-                            int forceNoContextCreation,
-                            cl_command_queue_properties queueProps,
-                            DeviceCheckFn deviceCheckFn)
+int runTestHarnessWithCheck( int argc, const char *argv[], int testNum, test_definition testList[],
+                             int forceNoContextCreation, cl_command_queue_properties queueProps,
+                             DeviceCheckFn deviceCheckFn )
-    cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
-    cl_uint num_platforms = 0;
-    cl_platform_id *platforms;
-    cl_device_id device;
-    int num_elements = DEFAULT_NUM_ELEMENTS;
-    cl_uint num_devices = 0;
-    cl_device_id *devices = NULL;
-    cl_uint choosen_device_index = 0;
-    cl_uint choosen_platform_index = 0;
+    cl_device_type     device_type = CL_DEVICE_TYPE_DEFAULT;
+    cl_uint            num_platforms = 0;
+    cl_platform_id     *platforms;
+    cl_device_id       device;
+    int                num_elements = DEFAULT_NUM_ELEMENTS;
+    cl_uint            num_devices = 0;
+    cl_device_id       *devices = NULL;
+    cl_uint            choosen_device_index = 0;
+    cl_uint            choosen_platform_index = 0;
-    int err, ret;
+    int            err, ret;
     char *endPtr;
     int based_on_env_var = 0;
     /* Check for environment variable to set device type */
-    char *env_mode = getenv("CL_DEVICE_TYPE");
-    if (env_mode != NULL)
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
         based_on_env_var = 1;
-        if (strcmp(env_mode, "gpu") == 0
-            || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
+        if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
             device_type = CL_DEVICE_TYPE_GPU;
-        else if (strcmp(env_mode, "cpu") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0)
+        else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
             device_type = CL_DEVICE_TYPE_CPU;
-        else if (strcmp(env_mode, "accelerator") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+        else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
             device_type = CL_DEVICE_TYPE_ACCELERATOR;
-        else if (strcmp(env_mode, "default") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_DEFAULT") == 0)
+        else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
             device_type = CL_DEVICE_TYPE_DEFAULT;
-            log_error("Unknown CL_DEVICE_TYPE env variable setting: "
-                      "%s.\nAborting...\n",
-                      env_mode);
+            log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
         // report on any unusual library search path indirection
-        char *libSearchPath = getenv("DYLD_LIBRARY_PATH");
-        if (libSearchPath)
-            log_info("*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath);
+        char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
         // report on any unusual framework search path indirection
-        char *frameworkSearchPath = getenv("DYLD_FRAMEWORK_PATH");
-        if (libSearchPath)
-            log_info("*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath);
+        char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
-    env_mode = getenv("CL_DEVICE_INDEX");
-    if (env_mode != NULL)
+    env_mode = getenv( "CL_DEVICE_INDEX" );
+    if( env_mode != NULL )
         choosen_device_index = atoi(env_mode);
-    env_mode = getenv("CL_PLATFORM_INDEX");
-    if (env_mode != NULL)
+    env_mode = getenv( "CL_PLATFORM_INDEX" );
+    if( env_mode != NULL )
         choosen_platform_index = atoi(env_mode);
@@ -173,43 +158,34 @@
     /* Special case: just list the tests */
-    if ((argc > 1)
-        && (!strcmp(argv[1], "-list") || !strcmp(argv[1], "-h")
-            || !strcmp(argv[1], "--help")))
+    if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
         char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
-        log_info(
-            "Usage: %s [<test name>*] [pid<num>] [id<num>] [<device type>]\n",
-            argv[0]);
-        log_info("\t<test name>\tOne or more of: (wildcard character '*') "
-                 "(default *)\n");
-        log_info("\tpid<num>\tIndicates platform at index <num> should be used "
-                 "(default 0).\n");
-        log_info("\tid<num>\t\tIndicates device at index <num> should be used "
-                 "(default 0).\n");
-        log_info("\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> "
-                 "(default CL_DEVICE_TYPE_DEFAULT)\n");
-        log_info("\n");
-        log_info("\tNOTE: You may pass environment variable "
-                 "CL_CONFORMANCE_RESULTS_FILENAME (currently '%s')\n",
-                 fileName != NULL ? fileName : "<undefined>");
-        log_info("\t      to save results to JSON file.\n");
+        log_info( "Usage: %s [<test name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
+        log_info( "\t<test name>\tOne or more of: (wildcard character '*') (default *)\n");
+        log_info( "\tpid<num>\tIndicates platform at index <num> should be used (default 0).\n" );
+        log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
+        log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
+        log_info( "\n" );
+        log_info( "\tNOTE: You may pass environment variable CL_CONFORMANCE_RESULTS_FILENAME (currently '%s')\n",
+                  fileName != NULL ? fileName : "<undefined>" );
+        log_info( "\t      to save results to JSON file.\n" );
-        log_info("\n");
-        log_info("Test names:\n");
-        for (int i = 0; i < testNum; i++)
+        log_info( "\n" );
+        log_info( "Test names:\n" );
+        for( int i = 0; i < testNum; i++ )
-            log_info("\t%s\n", testList[i].name);
+            log_info( "\t%s\n", testList[i].name );
         return EXIT_SUCCESS;
     /* How are we supposed to seed the random # generators? */
-    if (argc > 1 && strcmp(argv[argc - 1], "randomize") == 0)
+    if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
-        gRandomSeed = (cl_uint)time(NULL);
-        log_info("Random seed: %u.\n", gRandomSeed);
+        gRandomSeed = (cl_uint) time( NULL );
+        log_info( "Random seed: %u.\n", gRandomSeed );
         gReSeed = 1;
@@ -218,44 +194,39 @@
         log_info(" Initializing random seed to 0.\n");
-    /* Do we have an integer to specify the number of elements to pass to tests?
-     */
-    if (argc > 1)
+    /* Do we have an integer to specify the number of elements to pass to tests? */
+    if( argc > 1 )
-        ret = (int)strtol(argv[argc - 1], &endPtr, 10);
-        if (endPtr != argv[argc - 1] && *endPtr == 0)
+        ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
+        if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
-            /* By spec, this means the entire string was a valid integer, so we
-             * treat it as a num_elements spec */
+            /* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
             /* (hence why we stored the result in ret first) */
             num_elements = ret;
-            log_info("Testing with num_elements of %d\n", num_elements);
+            log_info( "Testing with num_elements of %d\n", num_elements );
     /* Do we have a CPU/GPU specification? */
-    if (argc > 1)
+    if( argc > 1 )
-        if (strcmp(argv[argc - 1], "gpu") == 0
-            || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0)
+        if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
             device_type = CL_DEVICE_TYPE_GPU;
-        else if (strcmp(argv[argc - 1], "cpu") == 0
-                 || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_CPU") == 0)
+        else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
             device_type = CL_DEVICE_TYPE_CPU;
-        else if (strcmp(argv[argc - 1], "accelerator") == 0
-                 || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+        else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
             device_type = CL_DEVICE_TYPE_ACCELERATOR;
-        else if (strcmp(argv[argc - 1], "CL_DEVICE_TYPE_DEFAULT") == 0)
+        else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
             device_type = CL_DEVICE_TYPE_DEFAULT;
@@ -263,74 +234,66 @@
     /* Did we choose a specific device index? */
-    if (argc > 1)
+    if( argc > 1 )
-        if (strlen(argv[argc - 1]) >= 3 && argv[argc - 1][0] == 'i'
-            && argv[argc - 1][1] == 'd')
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
-            choosen_device_index = atoi(&(argv[argc - 1][2]));
+            choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
     /* Did we choose a specific platform index? */
-    if (argc > 1)
+    if( argc > 1 )
-        if (strlen(argv[argc - 1]) >= 3 && argv[argc - 1][0] == 'p'
-            && argv[argc - 1][1] == 'i' && argv[argc - 1][2] == 'd')
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
-            choosen_platform_index = atoi(&(argv[argc - 1][3]));
+            choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
-    switch (device_type)
-    {
-        case CL_DEVICE_TYPE_GPU: log_info("Requesting GPU device "); break;
-        case CL_DEVICE_TYPE_CPU: log_info("Requesting CPU device "); break;
-            log_info("Requesting Accelerator device ");
-            break;
-            log_info("Requesting Default device ");
-            break;
-        default: log_error("Requesting unknown device "); return EXIT_FAILURE;
-    }
-    log_info(based_on_env_var ? "based on environment variable "
-                              : "based on command line ");
-    log_info("for platform index %d and device index %d\n",
-             choosen_platform_index, choosen_device_index);
-#if defined(__APPLE__)
-#if defined(__i386__) || defined(__x86_64__)
-#define kHasSSE3 0x00000008
-#define kHasSupplementalSSE3 0x00000100
-#define kHasSSE4_1 0x00000400
-#define kHasSSE4_2 0x00000800
+	switch (device_type)
+	{
+	case CL_DEVICE_TYPE_GPU:            log_info("Requesting GPU device "); break;
+	case CL_DEVICE_TYPE_CPU:            log_info("Requesting CPU device "); break;
+	case CL_DEVICE_TYPE_ACCELERATOR:    log_info("Requesting Accelerator device "); break;
+	case CL_DEVICE_TYPE_DEFAULT:        log_info("Requesting Default device "); break;
+	default:                            log_error("Requesting unknown device "); return EXIT_FAILURE;
+	}
+	log_info(based_on_env_var ? "based on environment variable " : "based on command line ");
+	log_info("for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
+#if defined( __APPLE__ )
+#if defined( __i386__ ) || defined( __x86_64__ )
+#define    kHasSSE3                0x00000008
+#define kHasSupplementalSSE3    0x00000100
+#define    kHasSSE4_1              0x00000400
+#define    kHasSSE4_2              0x00000800
     /* check our environment for a hint to disable SSE variants */
-        const char *env = getenv("CL_MAX_SSE");
-        if (env)
+        const char *env = getenv( "CL_MAX_SSE" );
+        if( env )
             extern int _cpu_capabilities;
             int mask = 0;
-            if (0 == strcasecmp(env, "SSE4.1"))
+            if( 0 == strcasecmp( env, "SSE4.1" ) )
                 mask = kHasSSE4_2;
-            else if (0 == strcasecmp(env, "SSSE3"))
+            else if( 0 == strcasecmp( env, "SSSE3" ) )
                 mask = kHasSSE4_2 | kHasSSE4_1;
-            else if (0 == strcasecmp(env, "SSE3"))
+            else if( 0 == strcasecmp( env, "SSE3" ) )
                 mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
-            else if (0 == strcasecmp(env, "SSE2"))
-                mask =
-                    kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
+            else if( 0 == strcasecmp( env, "SSE2" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
-                log_error("Error: Unknown CL_MAX_SSE setting: %s\n", env);
+                log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
                 return EXIT_FAILURE;
-            log_info("*** Environment: CL_MAX_SSE = %s ***\n", env);
+            log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
             _cpu_capabilities &= ~mask;
@@ -339,224 +302,204 @@
     /* Get the platform */
     err = clGetPlatformIDs(0, NULL, &num_platforms);
-    if (err)
-    {
+    if (err) {
         print_error(err, "clGetPlatformIDs failed");
         return EXIT_FAILURE;
-    platforms =
-        (cl_platform_id *)malloc(num_platforms * sizeof(cl_platform_id));
-    if (!platforms || choosen_platform_index >= num_platforms)
-    {
-        log_error("platform index out of range -- choosen_platform_index (%d) "
-                  ">= num_platforms (%d)\n",
-                  choosen_platform_index, num_platforms);
+    platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
+    if (!platforms || choosen_platform_index >= num_platforms) {
+        log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
         return EXIT_FAILURE;
     BufferOwningPtr<cl_platform_id> platformsBuf(platforms);
     err = clGetPlatformIDs(num_platforms, platforms, NULL);
-    if (err)
-    {
+    if (err) {
         print_error(err, "clGetPlatformIDs failed");
         return EXIT_FAILURE;
     /* Get the number of requested devices */
-    err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, 0,
-                         NULL, &num_devices);
-    if (err)
-    {
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, 0, NULL, &num_devices );
+    if (err) {
         print_error(err, "clGetDeviceIDs failed");
         return EXIT_FAILURE;
-    devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
-    if (!devices || choosen_device_index >= num_devices)
-    {
-        log_error("device index out of range -- choosen_device_index (%d) >= "
-                  "num_devices (%d)\n",
-                  choosen_device_index, num_devices);
+    devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
+    if (!devices || choosen_device_index >= num_devices) {
+        log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
         return EXIT_FAILURE;
     BufferOwningPtr<cl_device_id> devicesBuf(devices);
     /* Get the requested device */
-    err = clGetDeviceIDs(platforms[choosen_platform_index], device_type,
-                         num_devices, devices, NULL);
-    if (err)
-    {
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, num_devices, devices, NULL );
+    if (err) {
         print_error(err, "clGetDeviceIDs failed");
         return EXIT_FAILURE;
     device = devices[choosen_device_index];
-    err = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(gDeviceType),
-                          &gDeviceType, NULL);
-    if (err)
+    err = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof(gDeviceType), &gDeviceType, NULL );
+    if( err )
-        print_error(err, "Unable to get device type");
+        print_error( err, "Unable to get device type" );
         return TEST_FAIL;
-    if (printDeviceHeader(device) != CL_SUCCESS)
+    if( printDeviceHeader( device ) != CL_SUCCESS )
         return EXIT_FAILURE;
     cl_device_fp_config fpconfig = 0;
-    err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(fpconfig),
-                          &fpconfig, NULL);
-    if (err)
-    {
-        print_error(err,
-                    "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
+    if (err) {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
         return EXIT_FAILURE;
-    gFlushDenormsToZero = (0 == (fpconfig & CL_FP_DENORM));
-    log_info("Supports single precision denormals: %s\n",
-             gFlushDenormsToZero ? "NO" : "YES");
-    log_info("sizeof( void*) = %d  (host)\n", (int)sizeof(void *));
+    gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
+    log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
+    log_info( "sizeof( void*) = %d  (host)\n", (int) sizeof( void* ) );
-    // detect whether profile of the device is embedded
+    //detect whether profile of the device is embedded
     char profile[1024] = "";
-    err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
-                          NULL);
+    err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
     if (err)
-        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n");
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
         return EXIT_FAILURE;
     gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
-    // detect the floating point capabilities
+    //detect the floating point capabilities
     cl_device_fp_config floatCapabilities = 0;
-    err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
-                          sizeof(floatCapabilities), &floatCapabilities, NULL);
+    err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
     if (err)
-        print_error(err,
-                    "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
         return EXIT_FAILURE;
     // Check for problems that only embedded will have
-    if (gIsEmbedded)
+    if( gIsEmbedded )
-        // If the device is embedded, we need to detect if the device supports
-        // Infinity and NaN
-        if ((floatCapabilities & CL_FP_INF_NAN) == 0) gInfNanSupport = 0;
+        //If the device is embedded, we need to detect if the device supports Infinity and NaN
+        if ((floatCapabilities & CL_FP_INF_NAN) == 0)
+            gInfNanSupport = 0;
         // check the extensions list to see if ulong and long are supported
-        if (!is_extension_available(device, "cles_khr_int64")) gHasLong = 0;
+        if( !is_extension_available(device, "cles_khr_int64" ))
+            gHasLong = 0;
+    }
+    if( getenv( "OPENCL_1_0_DEVICE" ) )
+    {
+        char c_version[1024];
+        gIsOpenCL_1_0_Device = 1;
+        memset( c_version, 0, sizeof( c_version ) );
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
+        {
+            log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
+            return EXIT_FAILURE;
+        }
+        if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
+        {
+            gIsOpenCL_C_1_0_Device = 1;
+            log_info( "Device is a OpenCL C 1.0 device\n" );
+        }
+        else
+            log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
     cl_uint device_address_bits = 0;
-    if ((err = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
-                               sizeof(device_address_bits),
-                               &device_address_bits, NULL)))
+    if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
-        print_error(err, "Unable to obtain device address bits");
+        print_error( err, "Unable to obtain device address bits" );
         return EXIT_FAILURE;
-    if (device_address_bits)
-        log_info("sizeof( void*) = %d  (device)\n", device_address_bits / 8);
+    if( device_address_bits )
+        log_info( "sizeof( void*) = %d  (device)\n", device_address_bits/8 );
         log_error("Invalid device address bit size returned by device.\n");
         return EXIT_FAILURE;
-    if (gCompilationMode == kSpir_v)
-    {
-        test_status spirv_readiness = check_spirv_compilation_readiness(device);
-        if (spirv_readiness != TEST_PASS)
-        {
-            switch (spirv_readiness)
-            {
-                case TEST_PASS: break;
-                case TEST_FAIL: return fail_init_info(testNum);
-                case TEST_SKIP: return skip_init_info(testNum);
-                case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
-            }
-        }
-    }
     /* If we have a device checking function, run it */
-    if ((deviceCheckFn != NULL))
+    if( ( deviceCheckFn != NULL ) )
-        test_status status = deviceCheckFn(device);
+        test_status status = deviceCheckFn( device );
         switch (status)
-            case TEST_PASS: break;
-            case TEST_FAIL: return fail_init_info(testNum);
-            case TEST_SKIP: return skip_init_info(testNum);
-            case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
+            case TEST_PASS:
+                break;
+            case TEST_FAIL:
+                return fail_init_info(testNum);
+            case TEST_SKIP:
+                return skip_init_info(testNum);
-    if (num_elements <= 0) num_elements = DEFAULT_NUM_ELEMENTS;
+    if (num_elements <= 0)
+        num_elements = DEFAULT_NUM_ELEMENTS;
         // On most platforms which support denorm, default is FTZ off. However,
-        // on some hardware where the reference is computed, default might be
-        // flush denorms to zero e.g. arm. This creates issues in result
-        // verification. Since spec allows the implementation to either flush or
-        // not flush denorms to zero, an implementation may choose not be flush
-        // i.e. return denorm result whereas reference result may be zero
-        // (flushed denorm). Hence we need to disable denorm flushing on host
-        // side where reference is being computed to make sure we get
-        // non-flushed reference result. If implementation returns flushed
-        // result, we correctly take care of that in verification code.
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
 #if defined(__APPLE__) && defined(__arm__)
-    FPU_mode_type oldMode;
-    DisableFTZ(&oldMode);
+        FPU_mode_type oldMode;
+        DisableFTZ( &oldMode );
-    int error = parseAndCallCommandLineTests(argc, argv, device, testNum,
-                                             testList, forceNoContextCreation,
-                                             queueProps, num_elements);
+    int error = parseAndCallCommandLineTests( argc, argv, device, testNum, testList, forceNoContextCreation, queueProps, num_elements );
-#if defined(__APPLE__) && defined(__arm__)
-    // Restore the old FP mode before leaving.
-    RestoreFPState(&oldMode);
+ #if defined(__APPLE__) && defined(__arm__)
+     // Restore the old FP mode before leaving.
+    RestoreFPState( &oldMode );
     return (error == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
-static int find_matching_tests(test_definition testList[],
-                               unsigned char selectedTestList[], int testNum,
-                               const char *argument, bool isWildcard)
+static int find_matching_tests( test_definition testList[], unsigned char selectedTestList[], int testNum,
+                                const char *argument, bool isWildcard )
     int found_tests = 0;
-    size_t wildcard_length = strlen(argument) - 1; /* -1 for the asterisk */
+    size_t wildcard_length = strlen( argument ) - 1; /* -1 for the asterisk */
-    for (int i = 0; i < testNum; i++)
+    for( int i = 0; i < testNum; i++ )
-        if ((!isWildcard && strcmp(testList[i].name, argument) == 0)
-            || (isWildcard
-                && strncmp(testList[i].name, argument, wildcard_length) == 0))
+        if( ( !isWildcard && strcmp( testList[i].name, argument ) == 0 ) ||
+            ( isWildcard && strncmp( testList[i].name, argument, wildcard_length ) == 0 ) )
-            if (selectedTestList[i])
+            if( selectedTestList[i] )
-                log_error("ERROR: Test '%s' has already been selected.\n",
-                          testList[i].name);
+                log_error( "ERROR: Test '%s' has already been selected.\n", testList[i].name );
                 return EXIT_FAILURE;
-            else if (testList[i].func == NULL)
+            else if( testList[i].func == NULL )
-                log_error("ERROR: Test '%s' is missing implementation.\n",
-                          testList[i].name);
+                log_error( "ERROR: Test '%s' is missing implementation.\n", testList[i].name );
                 return EXIT_FAILURE;
                 selectedTestList[i] = 1;
                 found_tests = 1;
-                if (!isWildcard)
+                if( !isWildcard )
@@ -564,26 +507,22 @@
-    if (!found_tests)
+    if( !found_tests )
-        log_error("ERROR: The argument '%s' did not match any test names.\n",
-                  argument);
+        log_error( "ERROR: The argument '%s' did not match any test names.\n", argument );
         return EXIT_FAILURE;
     return EXIT_SUCCESS;
-static int saveResultsToJson(const char *fileName, const char *suiteName,
-                             test_definition testList[],
-                             unsigned char selectedTestList[],
-                             test_status resultTestList[], int testNum)
+static int saveResultsToJson( const char *fileName, const char *suiteName, test_definition testList[],
+                              unsigned char selectedTestList[], test_status resultTestList[], int testNum )
-    FILE *file = fopen(fileName, "w");
-    if (NULL == file)
+    FILE *file = fopen( fileName, "w" );
+    if( NULL == file )
-        log_error("ERROR: Failed to open '%s' for writing results.\n",
-                  fileName);
+        log_error( "ERROR: Failed to open '%s' for writing results.\n", fileName );
         return EXIT_FAILURE;
@@ -592,226 +531,185 @@
     const char *linebreak[] = { "", ",\n" };
     int add_linebreak = 0;
-    fprintf(file, "{\n");
-    fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName);
-    fprintf(file, "\t\"results\": {\n");
+    fprintf( file, "{\n" );
+    fprintf( file, "\t\"cmd\": \"%s\",\n", suiteName );
+    fprintf( file, "\t\"results\": {\n" );
-    for (int i = 0; i < testNum; ++i)
+    for( int i = 0; i < testNum; ++i )
-        if (selectedTestList[i])
+        if( selectedTestList[i] )
-            fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak],
-                    testList[i].name, result_map[(int)resultTestList[i]]);
+            fprintf( file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak], testList[i].name, result_map[(int)resultTestList[i]] );
             add_linebreak = 1;
-    fprintf(file, "\n");
+    fprintf( file, "\n");
-    fprintf(file, "\t}\n");
-    fprintf(file, "}\n");
+    fprintf( file, "\t}\n" );
+    fprintf( file, "}\n" );
-    int ret = fclose(file) ? 1 : 0;
+    int ret = fclose( file ) ? 1 : 0;
-    log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
+    log_info( "Saving results to %s: %s!\n", fileName, save_map[ret] );
     return ret;
-static void print_results(int failed, int count, const char *name)
+static void print_results( int failed, int count, const char* name )
-    if (count < failed)
+    if( count < failed )
         count = failed;
-    if (failed == 0)
+    if( failed == 0 )
-        if (count > 1)
+        if( count > 1 )
-            log_info("PASSED %d of %d %ss.\n", count, count, name);
+            log_info( "PASSED %d of %d %ss.\n", count, count, name );
-            log_info("PASSED %s.\n", name);
+            log_info( "PASSED %s.\n", name );
-    else if (failed > 0)
+    else if( failed > 0 )
-        if (count > 1)
+        if( count > 1 )
-            log_error("FAILED %d of %d %ss.\n", failed, count, name);
+            log_error( "FAILED %d of %d %ss.\n", failed, count, name );
-            log_error("FAILED %s.\n", name);
+            log_error( "FAILED %s.\n", name );
-int parseAndCallCommandLineTests(int argc, const char *argv[],
-                                 cl_device_id device, int testNum,
-                                 test_definition testList[],
-                                 int forceNoContextCreation,
-                                 cl_command_queue_properties queueProps,
-                                 int num_elements)
+int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, int testNum,
+                                  test_definition testList[], int forceNoContextCreation,
+                                  cl_command_queue_properties queueProps, int num_elements )
     int ret = EXIT_SUCCESS;
-    unsigned char *selectedTestList = (unsigned char *)calloc(testNum, 1);
+    unsigned char *selectedTestList = ( unsigned char* ) calloc( testNum, 1 );
     test_status *resultTestList = NULL;
-    if (argc == 1)
+    if( argc == 1 )
         /* No actual arguments, all tests will be run. */
-        memset(selectedTestList, 1, testNum);
+        memset( selectedTestList, 1, testNum );
-        for (int i = 1; i < argc; i++)
+        for( int i = 1; i < argc; i++ )
-            if (strchr(argv[i], '*') != NULL)
+            if( strchr( argv[i], '*' ) != NULL )
-                ret = find_matching_tests(testList, selectedTestList, testNum,
-                                          argv[i], true);
+                ret = find_matching_tests( testList, selectedTestList, testNum, argv[i], true );
-                if (strcmp(argv[i], "all") == 0)
+                if( strcmp( argv[i], "all" ) == 0 )
-                    memset(selectedTestList, 1, testNum);
+                    memset( selectedTestList, 1, testNum );
-                    ret = find_matching_tests(testList, selectedTestList,
-                                              testNum, argv[i], false);
+                    ret = find_matching_tests( testList, selectedTestList, testNum, argv[i], false );
-            if (ret == EXIT_FAILURE)
+            if( ret == EXIT_FAILURE )
-    if (ret == EXIT_SUCCESS)
+    if( ret == EXIT_SUCCESS )
-        resultTestList =
-            (test_status *)calloc(testNum, sizeof(*resultTestList));
+        resultTestList = ( test_status* ) calloc( testNum, sizeof(*resultTestList) );
-        callTestFunctions(testList, selectedTestList, resultTestList, testNum,
-                          device, forceNoContextCreation, num_elements,
-                          queueProps);
+        callTestFunctions( testList, selectedTestList, resultTestList, testNum, device,
+                           forceNoContextCreation, num_elements, queueProps );
-        print_results(gFailCount, gTestCount, "sub-test");
-        print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test");
+        print_results( gFailCount, gTestCount, "sub-test" );
+        print_results( gTestsFailed, gTestsFailed + gTestsPassed, "test" );
-        char *filename = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
-        if (filename != NULL)
+        char *filename = getenv( "CL_CONFORMANCE_RESULTS_FILENAME" );
+        if( filename != NULL )
-            ret = saveResultsToJson(filename, argv[0], testList,
-                                    selectedTestList, resultTestList, testNum);
-        }
-        if (std::any_of(resultTestList, resultTestList + testNum,
-                        [](test_status result) {
-                            switch (result)
-                            {
-                                case TEST_PASS:
-                                case TEST_SKIP: return false;
-                                case TEST_FAIL:
-                                default: return true;
-                            };
-                        }))
-        {
-            ret = EXIT_FAILURE;
+            ret = saveResultsToJson( filename, argv[0], testList, selectedTestList, resultTestList, testNum );
-    free(selectedTestList);
-    free(resultTestList);
+    free( selectedTestList );
+    free( resultTestList );
     return ret;
-void callTestFunctions(test_definition testList[],
-                       unsigned char selectedTestList[],
-                       test_status resultTestList[], int testNum,
-                       cl_device_id deviceToUse, int forceNoContextCreation,
-                       int numElementsToUse,
-                       cl_command_queue_properties queueProps)
+void callTestFunctions( test_definition testList[], unsigned char selectedTestList[], test_status resultTestList[],
+                        int testNum, cl_device_id deviceToUse, int forceNoContextCreation, int numElementsToUse,
+                        cl_command_queue_properties queueProps )
-    for (int i = 0; i < testNum; ++i)
+    for( int i = 0; i < testNum; ++i )
-        if (selectedTestList[i])
+        if( selectedTestList[i] )
-            resultTestList[i] = callSingleTestFunction(
-                testList[i], deviceToUse, forceNoContextCreation,
-                numElementsToUse, queueProps);
+            resultTestList[i] = callSingleTestFunction( testList[i], deviceToUse, forceNoContextCreation,
+                                                        numElementsToUse, queueProps );
-void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info,
-                                 size_t cb, void *user_data)
+void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
-    log_info("%s\n", errinfo);
+    log_info( "%s\n", errinfo );
 // Actual function execution
-test_status callSingleTestFunction(test_definition test,
-                                   cl_device_id deviceToUse,
-                                   int forceNoContextCreation,
-                                   int numElementsToUse,
-                                   const cl_queue_properties queueProps)
+test_status callSingleTestFunction( test_definition test, cl_device_id deviceToUse, int forceNoContextCreation,
+                                    int numElementsToUse, const cl_queue_properties queueProps )
     test_status status;
     cl_int error;
     cl_context context = NULL;
     cl_command_queue queue = NULL;
-    log_info("%s...\n",;
-    fflush(stdout);
+    log_info( "%s...\n", );
+    fflush( stdout );
     const Version device_version = get_device_cl_version(deviceToUse);
     if (test.min_version > device_version)
-        version_expected_info(, "OpenCL",
-                              test.min_version.to_string().c_str(),
-                              device_version.to_string().c_str());
+        version_expected_info(, test.min_version.to_string().c_str(), device_version.to_string().c_str());
         return TEST_SKIP;
     /* Create a context to work with, unless we're told not to */
-    if (!forceNoContextCreation)
+    if( !forceNoContextCreation )
-        context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL,
-                                  &error);
+        context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
         if (!context)
-            print_error(error, "Unable to create testing context");
+            print_error( error, "Unable to create testing context" );
             return TEST_FAIL;
-        if (device_version < Version(2, 0))
-        {
-            queue =
-                clCreateCommandQueue(context, deviceToUse, queueProps, &error);
-        }
-        else
-        {
-            const cl_command_queue_properties cmd_queueProps =
-                (queueProps) ? CL_QUEUE_PROPERTIES : 0;
-            cl_command_queue_properties queueCreateProps[] = { cmd_queueProps,
-                                                               queueProps, 0 };
-            queue = clCreateCommandQueueWithProperties(
-                context, deviceToUse, &queueCreateProps[0], &error);
+        if (device_version < Version(2, 0)) {
+            queue = clCreateCommandQueue(context, deviceToUse, queueProps, &error);
+        } else {
+            const cl_command_queue_properties cmd_queueProps = (queueProps)?CL_QUEUE_PROPERTIES:0;
+            cl_command_queue_properties queueCreateProps[] = {cmd_queueProps, queueProps, 0};
+            queue = clCreateCommandQueueWithProperties( context, deviceToUse, &queueCreateProps[0], &error );
-        if (queue == NULL)
+        if( queue == NULL )
-            print_error(error, "Unable to create testing command queue");
+            print_error( error, "Unable to create testing command queue" );
             return TEST_FAIL;
@@ -820,35 +718,38 @@
     error = check_functions_for_offline_compiler(, deviceToUse);
-    if (test.func == NULL)
+    if( test.func == NULL )
-        // Skip unimplemented test, can happen when all of the tests are
-        // selected
+        // Skip unimplemented test, can happen when all of the tests are selected
         log_info("%s test currently not implemented\n",;
         status = TEST_SKIP;
-        int ret = test.func(deviceToUse, context, queue, numElementsToUse);
-        if (ret == TEST_SKIPPED_ITSELF)
+        int ret = test.func(deviceToUse, context, queue, numElementsToUse);        //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
+        if( ret == TEST_NOT_IMPLEMENTED )
-            /* Tests can also let us know they're not supported by the
-             * implementation */
+            /* Tests can also let us know they're not implemented yet */
+            log_info("%s test currently not implemented\n",;
+            status = TEST_SKIP;
+        }
+        else if (ret == TEST_SKIPPED_ITSELF)
+        {
+            /* Tests can also let us know they're not supported by the implementation */
             log_info("%s test not supported\n",;
             status = TEST_SKIP;
             /* Print result */
-            if (ret == 0)
-            {
-                log_info("%s passed\n",;
+            if( ret == 0 ) {
+                log_info( "%s passed\n", );
                 status = TEST_PASS;
-                log_error("%s FAILED\n",;
+                log_error( "%s FAILED\n", );
                 status = TEST_FAIL;
@@ -856,50 +757,50 @@
     /* Release the context */
-    if (!forceNoContextCreation)
+    if( !forceNoContextCreation )
         int error = clFinish(queue);
-        if (error)
-        {
-            log_error("clFinish failed: %s\n", IGetErrorString(error));
+        if (error) {
+            log_error("clFinish failed: %d", error);
             status = TEST_FAIL;
-        clReleaseCommandQueue(queue);
-        clReleaseContext(context);
+        clReleaseCommandQueue( queue );
+        clReleaseContext( context );
     return status;
-#if !defined(__APPLE__)
-void memset_pattern4(void *dest, const void *src_pattern, size_t bytes)
+#if ! defined( __APPLE__ )
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
-    uint32_t pat = ((uint32_t *)src_pattern)[0];
+    uint32_t pat = ((uint32_t*) src_pattern)[0];
     size_t count = bytes / 4;
     size_t i;
-    uint32_t *d = (uint32_t *)dest;
+    uint32_t *d = (uint32_t*)dest;
-    for (i = 0; i < count; i++) d[i] = pat;
+    for( i = 0; i < count; i++ )
+        d[i] = pat;
     d += i;
     bytes &= 3;
-    if (bytes) memcpy(d, src_pattern, bytes);
+    if( bytes )
+        memcpy( d, src_pattern, bytes );
-cl_device_type GetDeviceType(cl_device_id d)
+cl_device_type GetDeviceType( cl_device_id d )
     cl_device_type result = -1;
-    cl_int err =
-        clGetDeviceInfo(d, CL_DEVICE_TYPE, sizeof(result), &result, NULL);
-    if (CL_SUCCESS != err)
-        log_error("ERROR: Unable to get device type for device %p\n", d);
+    cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
+    if( CL_SUCCESS != err )
+        log_error( "ERROR: Unable to get device type for device %p\n", d );
     return result;
-cl_device_id GetOpposingDevice(cl_device_id device)
+cl_device_id GetOpposingDevice( cl_device_id device )
     cl_int error;
     cl_device_id *otherDevices;
@@ -907,59 +808,53 @@
     cl_platform_id plat;
     // Get the platform of the device to use for getting a list of devices
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(plat), &plat, NULL);
-    if (error != CL_SUCCESS)
+    error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
+    if( error != CL_SUCCESS )
-        print_error(error, "Unable to get device's platform");
+        print_error( error, "Unable to get device's platform" );
         return NULL;
     // Get a list of all devices
-    error = clGetDeviceIDs(plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount);
-    if (error != CL_SUCCESS)
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
+    if( error != CL_SUCCESS )
-        print_error(error, "Unable to get list of devices size");
+        print_error( error, "Unable to get list of devices size" );
         return NULL;
-    otherDevices = (cl_device_id *)malloc(actualCount * sizeof(cl_device_id));
-    if (NULL == otherDevices)
-    {
-        print_error(error, "Unable to allocate list of other devices.");
+    otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
+    if (NULL == otherDevices) {
+        print_error( error, "Unable to allocate list of other devices." );
         return NULL;
     BufferOwningPtr<cl_device_id> otherDevicesBuf(otherDevices);
-    error = clGetDeviceIDs(plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices,
-                           NULL);
-    if (error != CL_SUCCESS)
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
+    if( error != CL_SUCCESS )
-        print_error(error, "Unable to get list of devices");
+        print_error( error, "Unable to get list of devices" );
         return NULL;
-    if (actualCount == 1)
+    if( actualCount == 1 )
-        return device; // NULL means error, returning self means we couldn't
-                       // find another one
+        return device;    // NULL means error, returning self means we couldn't find another one
     // Loop and just find one that isn't the one we were given
     cl_uint i;
-    for (i = 0; i < actualCount; i++)
+    for( i = 0; i < actualCount; i++ )
-        if (otherDevices[i] != device)
+        if( otherDevices[ i ] != device )
             cl_device_type newType;
-            error = clGetDeviceInfo(otherDevices[i], CL_DEVICE_TYPE,
-                                    sizeof(newType), &newType, NULL);
-            if (error != CL_SUCCESS)
+            error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
+            if( error != CL_SUCCESS )
-                print_error(error,
-                            "Unable to get device type for other device");
+                print_error( error, "Unable to get device type for other device" );
                 return NULL;
-            cl_device_id result = otherDevices[i];
+            cl_device_id result = otherDevices[ i ];
             return result;
@@ -975,8 +870,7 @@
     ASSERT_SUCCESS(err, "clGetDeviceInfo");
     std::vector<char> str(str_size);
-    err =
-        clGetDeviceInfo(device, CL_DEVICE_VERSION, str_size,, NULL);
+    err = clGetDeviceInfo(device, CL_DEVICE_VERSION, str_size,, NULL);
     ASSERT_SUCCESS(err, "clGetDeviceInfo");
     if (strstr(, "OpenCL 1.0") != NULL)
@@ -994,229 +888,55 @@
     else if (strstr(, "OpenCL 3.0") != NULL)
         return Version(3, 0);
-    throw std::runtime_error(std::string("Unknown OpenCL version: ")
-                             +;
+    throw std::runtime_error(std::string("Unknown OpenCL version: ") +;
-bool check_device_spirv_version_reported(cl_device_id device)
+void PrintArch( void )
-    size_t str_size;
-    cl_int err;
-    std::vector<char> str;
-    if (gCoreILProgram)
-    {
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION, 0, NULL, &str_size);
-        if (err != CL_SUCCESS)
-        {
-            log_error(
-                "clGetDeviceInfo: cannot read CL_DEVICE_IL_VERSION size;");
-            return false;
-        }
-        str.resize(str_size);
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION, str_size,
-                    , NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error(
-                "clGetDeviceInfo: cannot read CL_DEVICE_IL_VERSION value;");
-            return false;
-        }
-    }
-    else
-    {
-        cl_int err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION_KHR, 0, NULL,
-                                     &str_size);
-        if (err != CL_SUCCESS)
-        {
-            log_error(
-                "clGetDeviceInfo: cannot read CL_DEVICE_IL_VERSION_KHR size;");
-            return false;
-        }
-        str.resize(str_size);
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION_KHR, str_size,
-                    , NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error(
-                "clGetDeviceInfo: cannot read CL_DEVICE_IL_VERSION_KHR value;");
-            return false;
-        }
-    }
-    if (strstr(, "SPIR-V") == NULL)
-    {
-        log_info("This device does not support SPIR-V offline compilation.\n");
-        return false;
-    }
-    else
-    {
-        Version spirv_version = get_device_spirv_il_version(device);
-        log_info("This device supports SPIR-V offline compilation. SPIR-V "
-                 "version is %s\n",
-                 spirv_version.to_string().c_str());
-    }
-    return true;
-Version get_device_spirv_il_version(cl_device_id device)
-    size_t str_size;
-    cl_int err;
-    std::vector<char> str;
-    if (gCoreILProgram)
-    {
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION, 0, NULL, &str_size);
-        ASSERT_SUCCESS(err, "clGetDeviceInfo");
-        str.resize(str_size);
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION, str_size,
-                    , NULL);
-        ASSERT_SUCCESS(err, "clGetDeviceInfo");
-    }
-    else
-    {
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION_KHR, 0, NULL,
-                              &str_size);
-        ASSERT_SUCCESS(err, "clGetDeviceInfo");
-        str.resize(str_size);
-        err = clGetDeviceInfo(device, CL_DEVICE_IL_VERSION_KHR, str_size,
-                    , NULL);
-        ASSERT_SUCCESS(err, "clGetDeviceInfo");
-    }
-    if (strstr(, "SPIR-V_1.0") != NULL)
-        return Version(1, 0);
-    else if (strstr(, "SPIR-V_1.1") != NULL)
-        return Version(1, 1);
-    else if (strstr(, "SPIR-V_1.2") != NULL)
-        return Version(1, 2);
-    else if (strstr(, "SPIR-V_1.3") != NULL)
-        return Version(1, 3);
-    else if (strstr(, "SPIR-V_1.4") != NULL)
-        return Version(1, 4);
-    else if (strstr(, "SPIR-V_1.5") != NULL)
-        return Version(1, 5);
-    throw std::runtime_error(std::string("Unknown SPIR-V version: ")
-                             +;
-test_status check_spirv_compilation_readiness(cl_device_id device)
-    auto ocl_version = get_device_cl_version(device);
-    auto ocl_expected_min_version = Version(2, 1);
-    if (ocl_version < ocl_expected_min_version)
-    {
-        if (is_extension_available(device, "cl_khr_il_program"))
-        {
-            gCoreILProgram = false;
-            bool spirv_supported = check_device_spirv_version_reported(device);
-            if (spirv_supported == false)
-            {
-                log_error("SPIR-V intermediate language not supported !!! "
-                          "OpenCL %s requires support.\n",
-                          ocl_version.to_string().c_str());
-                return TEST_FAIL;
-            }
-            else
-            {
-                return TEST_PASS;
-            }
-        }
-        else
-        {
-            log_error("SPIR-V intermediate language support on OpenCL version "
-                      "%s requires cl_khr_il_program extension.\n",
-                      ocl_version.to_string().c_str());
-            return TEST_SKIP;
-        }
-    }
-    bool spirv_supported = check_device_spirv_version_reported(device);
-    if (ocl_version >= ocl_expected_min_version && ocl_version <= Version(2, 2))
-    {
-        if (spirv_supported == false)
-        {
-            log_error("SPIR-V intermediate language not supported !!! OpenCL "
-                      "%s requires support.\n",
-                      ocl_version.to_string().c_str());
-            return TEST_FAIL;
-        }
-    }
-    if (ocl_version > Version(2, 2))
-    {
-        if (spirv_supported == false)
-        {
-            log_info("SPIR-V intermediate language not supported in OpenCL %s. "
-                     "Test skipped.\n",
-                     ocl_version.to_string().c_str());
-            return TEST_SKIP;
-        }
-    }
-    return TEST_PASS;
-cl_platform_id getPlatformFromDevice(cl_device_id deviceID)
-    cl_platform_id platform = nullptr;
-    cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform),
-                                 &platform, nullptr);
-    ASSERT_SUCCESS(err, "clGetDeviceInfo");
-    return platform;
-void PrintArch(void)
-    vlog("sizeof( void*) = %ld\n", sizeof(void *));
-#if defined(__ppc__)
-    vlog("ARCH:\tppc\n");
-#elif defined(__ppc64__)
-    vlog("ARCH:\tppc64\n");
-#elif defined(__PPC__)
-    vlog("ARCH:\tppc\n");
-#elif defined(__i386__)
-    vlog("ARCH:\ti386\n");
-#elif defined(__x86_64__)
-    vlog("ARCH:\tx86_64\n");
-#elif defined(__arm__)
-    vlog("ARCH:\tarm\n");
+    vlog( "sizeof( void*) = %ld\n", sizeof( void *) );
+#if defined( __ppc__ )
+    vlog( "ARCH:\tppc\n" );
+#elif defined( __ppc64__ )   
+    vlog( "ARCH:\tppc64\n" );
+#elif defined( __PPC__ )   
+    vlog( "ARCH:\tppc\n" );
+#elif defined( __i386__ )   
+    vlog( "ARCH:\ti386\n" );
+#elif defined( __x86_64__ )   
+    vlog( "ARCH:\tx86_64\n" );
+#elif defined( __arm__ )   
+    vlog( "ARCH:\tarm\n" );
 #elif defined(__aarch64__)
-    vlog("ARCH:\taarch64\n");
-#elif defined(_WIN32)
-    vlog("ARCH:\tWindows\n");
+    vlog( "ARCH:\taarch64\n" );
+#elif defined (_WIN32)
+    vlog( "ARCH:\tWindows\n" );
 #error unknown arch
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
     int type = 0;
-    size_t typeSize = sizeof(type);
-    sysctlbyname("hw.cputype", &type, &typeSize, NULL, 0);
-    vlog("cpu type:\t%d\n", type);
-    typeSize = sizeof(type);
-    sysctlbyname("hw.cpusubtype", &type, &typeSize, NULL, 0);
-    vlog("cpu subtype:\t%d\n", type);
+    size_t typeSize = sizeof( type );
+    sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
+    vlog( "cpu type:\t%d\n", type );
+    typeSize = sizeof( type );
+    sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
+    vlog( "cpu subtype:\t%d\n", type );
-#elif defined(__linux__)
+#elif defined( __linux__ )
     struct utsname buffer;
-    if (uname(&buffer) != 0)
-    {
-        vlog("uname error");
+    if (uname(&buffer) != 0) {
+       vlog("uname error");
-    else
-    {
-        vlog("system name = %s\n", buffer.sysname);
-        vlog("node name   = %s\n", buffer.nodename);
-        vlog("release     = %s\n", buffer.release);
-        vlog("version     = %s\n", buffer.version);
-        vlog("machine     = %s\n", buffer.machine);
+    else {
+       vlog("system name = %s\n", buffer.sysname);
+       vlog("node name   = %s\n", buffer.nodename);
+       vlog("release     = %s\n", buffer.release);
+       vlog("version     = %s\n", buffer.version);
+       vlog("machine     = %s\n", buffer.machine);
diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h
index d6054de..7132e77 100644
--- a/test_common/harness/testHarness.h
+++ b/test_common/harness/testHarness.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017-2019 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -23,26 +23,18 @@
 #include <string>
-class Version {
+class Version
-    Version(): m_major(0), m_minor(0) {}
-    Version(int major, int minor): m_major(major), m_minor(minor) {}
-    bool operator>(const Version &rhs) const { return to_int() > rhs.to_int(); }
-    bool operator<(const Version &rhs) const { return to_int() < rhs.to_int(); }
-    bool operator<=(const Version &rhs) const
-    {
-        return to_int() <= rhs.to_int();
-    }
-    bool operator>=(const Version &rhs) const
-    {
-        return to_int() >= rhs.to_int();
-    }
-    bool operator==(const Version &rhs) const
-    {
-        return to_int() == rhs.to_int();
-    }
+    Version() : m_major(0), m_minor(0) {}
+    Version(int major, int minor) : m_major(major), m_minor(minor) {}
+    bool operator>(const Version& rhs) const { return to_int() > rhs.to_int(); }
+    bool operator<(const Version& rhs) const { return to_int() < rhs.to_int(); }
+    bool operator<=(const Version& rhs) const { return to_int() <= rhs.to_int(); }
+    bool operator>=(const Version& rhs) const { return to_int() >= rhs.to_int(); }
+    bool operator==(const Version& rhs) const { return to_int() == rhs.to_int(); }
     int to_int() const { return m_major * 10 + m_minor; }
-    std::string to_string() const
+    std::string to_string() const 
         std::stringstream ss;
         ss << m_major << "." << m_minor;
@@ -64,13 +56,17 @@
     {                                                                          \
         test_##fn, #fn, ver                                                    \
+#define NOT_IMPLEMENTED_TEST(fn)                                               \
+    {                                                                          \
+        NULL, #fn, Version(0, 0)                                               \
+    }
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 typedef struct test_definition
     basefn func;
-    const char *name;
+    const char* name;
     Version min_version;
 } test_definition;
@@ -80,7 +76,6 @@
     TEST_PASS = 0,
     TEST_FAIL = 1,
     TEST_SKIP = 2,
 } test_status;
 extern int gFailCount;
@@ -88,103 +83,74 @@
 extern cl_uint gReSeed;
 extern cl_uint gRandomSeed;
-// Supply a list of functions to test here. This will allocate a CL device,
-// create a context, all that setup work, and then call each function in turn as
-// dictatated by the passed arguments. Returns EXIT_SUCCESS iff all tests
-// succeeded or the tests were listed, otherwise return EXIT_FAILURE.
-extern int runTestHarness(int argc, const char *argv[], int testNum,
-                          test_definition testList[],
-                          int forceNoContextCreation,
-                          cl_command_queue_properties queueProps);
+// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
+// setup work, and then call each function in turn as dictatated by the passed arguments.
+// Returns EXIT_SUCCESS iff all tests succeeded or the tests were listed,
+// otherwise return EXIT_FAILURE.
+extern int runTestHarness( int argc, const char *argv[], int testNum, test_definition testList[],
+                           int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
-// Device checking function. See runTestHarnessWithCheck. If this function
-// returns anything other than TEST_PASS, the harness exits.
-typedef test_status (*DeviceCheckFn)(cl_device_id device);
+// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits.
+typedef test_status (*DeviceCheckFn)( cl_device_id device );
-// Same as runTestHarness, but also supplies a function that checks the created
-// device for required functionality. Returns EXIT_SUCCESS iff all tests
-// succeeded or the tests were listed, otherwise return EXIT_FAILURE.
-extern int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
-                                   test_definition testList[],
-                                   int forceNoContextCreation,
-                                   cl_command_queue_properties queueProps,
-                                   DeviceCheckFn deviceCheckFn);
+// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
+// Returns EXIT_SUCCESS iff all tests succeeded or the tests were listed,
+// otherwise return EXIT_FAILURE.
+extern int runTestHarnessWithCheck( int argc, const char *argv[], int testNum, test_definition testList[],
+                                    int forceNoContextCreation, cl_command_queue_properties queueProps,
+                                    DeviceCheckFn deviceCheckFn );
-// The command line parser used by runTestHarness to break up parameters into
-// calls to callTestFunctions
-extern int parseAndCallCommandLineTests(int argc, const char *argv[],
-                                        cl_device_id device, int testNum,
-                                        test_definition testList[],
-                                        int forceNoContextCreation,
-                                        cl_command_queue_properties queueProps,
-                                        int num_elements);
+// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
+extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, int testNum,
+                                         test_definition testList[], int forceNoContextCreation,
+                                         cl_command_queue_properties queueProps, int num_elements );
-// Call this function if you need to do all the setup work yourself, and just
-// need the function list called/ managed.
+// Call this function if you need to do all the setup work yourself, and just need the function list called/
+// managed.
 //    testList is the data structure that contains test functions and its names
-//    selectedTestList is an array of integers (treated as bools) which tell
-//    which function is to be called,
-//       each element at index i, corresponds to the element in testList at
-//       index i
-//    resultTestList is an array of statuses which contain the result of each
-//    selected test testNum is the number of tests in testList, selectedTestList
-//    and resultTestList contextProps are used to create a testing context for
-//    each test deviceToUse and numElementsToUse are all just passed to each
-//    test function
-extern void callTestFunctions(test_definition testList[],
-                              unsigned char selectedTestList[],
-                              test_status resultTestList[], int testNum,
-                              cl_device_id deviceToUse,
-                              int forceNoContextCreation, int numElementsToUse,
-                              cl_command_queue_properties queueProps);
+//    selectedTestList is an array of integers (treated as bools) which tell which function is to be called,
+//       each element at index i, corresponds to the element in testList at index i
+//    resultTestList is an array of statuses which contain the result of each selected test
+//    testNum is the number of tests in testList, selectedTestList and resultTestList
+//    contextProps are used to create a testing context for each test
+//    deviceToUse and numElementsToUse are all just passed to each test function
+extern void callTestFunctions( test_definition testList[], unsigned char selectedTestList[], test_status resultTestList[],
+                               int testNum, cl_device_id deviceToUse, int forceNoContextCreation, int numElementsToUse,
+                               cl_command_queue_properties queueProps );
-// This function is called by callTestFunctions, once per function, to do setup,
-// call, logging and cleanup
-extern test_status
-callSingleTestFunction(test_definition test, cl_device_id deviceToUse,
-                       int forceNoContextCreation, int numElementsToUse,
-                       cl_command_queue_properties queueProps);
+// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
+extern test_status callSingleTestFunction( test_definition test, cl_device_id deviceToUse, int forceNoContextCreation,
+                                           int numElementsToUse, cl_command_queue_properties queueProps );
 ///// Miscellaneous steps
 // standard callback function for context pfn_notify
-extern void CL_CALLBACK notify_callback(const char *errinfo,
-                                        const void *private_info, size_t cb,
-                                        void *user_data);
+extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
-extern cl_device_type GetDeviceType(cl_device_id);
+extern cl_device_type GetDeviceType( cl_device_id );
-// Given a device (most likely passed in by the harness, but not required), will
-// attempt to find a DIFFERENT device and return it. Useful for finding another
-// device to run multi-device tests against. Note that returning NULL means an
-// error was hit, but if no error was hit and the device passed in is the only
-// device available, the SAME device is returned, so check!
-extern cl_device_id GetOpposingDevice(cl_device_id device);
+// Given a device (most likely passed in by the harness, but not required), will attempt to find
+// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
+// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
+// is the only device available, the SAME device is returned, so check!
+extern cl_device_id GetOpposingDevice( cl_device_id device );
-Version get_device_spirv_il_version(cl_device_id device);
-bool check_device_spirv_il_support(cl_device_id device);
-void version_expected_info(const char *test_name, const char *api_name,
-                           const char *expected_version,
-                           const char *device_version);
-test_status check_spirv_compilation_readiness(cl_device_id device);
+void version_expected_info(const char * test_name, const char * expected_version, const char * device_version);
-extern int gFlushDenormsToZero; // This is set to 1 if the device does not
-                                // support denorms (CL_FP_DENORM)
-extern int gInfNanSupport; // This is set to 1 if the device supports infinities
-                           // and NaNs
-extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
-extern int gHasLong; // This is set to 1 if the device suppots long and ulong
-                     // types in OpenCL C.
-extern bool gCoreILProgram;
+extern int      gFlushDenormsToZero;    // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
+extern int      gInfNanSupport;         // This is set to 1 if the device supports infinities and NaNs
+extern int        gIsEmbedded;            // This is set to 1 if the device is an embedded device
+extern int        gHasLong;               // This is set to 1 if the device suppots long and ulong types in OpenCL C.
+extern int      gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
-extern cl_platform_id getPlatformFromDevice(cl_device_id deviceID);
-#if !defined(__APPLE__)
-void memset_pattern4(void *, const void *, size_t);
+#if ! defined( __APPLE__ )
+    void     memset_pattern4(void *, const void *, size_t);
 extern void PrintArch(void);
 #endif // _testHarness_h
diff --git a/test_common/harness/test_mt19937.c b/test_common/harness/test_mt19937.c
index fa57fd3..c0498ea 100644
--- a/test_common/harness/test_mt19937.c
+++ b/test_common/harness/test_mt19937.c
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,26 +16,24 @@
 #include "mt19937.h"
 #include <stdio.h>
-int main(void)
+int main( void )
     MTdata d = init_genrand(42);
     int i;
-    const cl_uint reference[16] = {
-        0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4, 0x55c31f24, 0x8bcd21ab,
-        0x14d5fef5, 0x9416d2b6, 0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
-        0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92
-    };
+    const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
+                                    0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
+                                    0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
+                                    0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
     int errcount = 0;
-    for (i = 0; i < 65536; i++)
+    for( i = 0; i < 65536; i++ )
-        cl_uint u = genrand_int32(d);
-        if (0 == (i & 4095))
+        cl_uint u = genrand_int32( d );
+        if( 0 == (i & 4095) )
-            if (u != reference[i >> 12])
+            if( u != reference[i>>12] )
-                printf("ERROR: expected *0x%8.8x at %d.  Got 0x%8.8x\n",
-                       reference[i >> 12], i, u);
+                printf("ERROR: expected *0x%8.8x at %d.  Got 0x%8.8x\n", reference[i>>12], i, u );
@@ -43,7 +41,7 @@
-    if (errcount)
+    if( errcount )
         printf("mt19937 test failed.\n");
         printf("mt19937 test passed.\n");
diff --git a/test_common/harness/threadTesting.cpp b/test_common/harness/threadTesting.cpp
index 875ee59..1a07f97 100644
--- a/test_common/harness/threadTesting.cpp
+++ b/test_common/harness/threadTesting.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -96,3 +96,5 @@
     return (int)((intptr_t)retVal);
diff --git a/test_common/harness/threadTesting.h b/test_common/harness/threadTesting.h
index 765eabc..71d5797 100644
--- a/test_common/harness/threadTesting.h
+++ b/test_common/harness/threadTesting.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,15 +17,17 @@
 #define _threadTesting_h
 #ifdef __APPLE__
-#include <OpenCL/opencl.h>
+    #include <OpenCL/opencl.h>
-#include <CL/opencl.h>
+    #include <CL/opencl.h>
-typedef int (*basefn)(cl_device_id deviceID, cl_context context,
-                      cl_command_queue queue, int num_elements);
-extern int test_threaded_function(basefn fnToTest, cl_device_id device,
-                                  cl_context context, cl_command_queue queue,
-                                  int numElements);
+#define TEST_NOT_IMPLEMENTED        -99
+#define TEST_SKIPPED_ITSELF         -100
+typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
 #endif // _threadTesting_h
diff --git a/test_common/harness/typeWrappers.cpp b/test_common/harness/typeWrappers.cpp
index e6520b1..d4e08fb 100644
--- a/test_common/harness/typeWrappers.cpp
+++ b/test_common/harness/typeWrappers.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,310 +19,256 @@
 #include <stdlib.h>
 #include "clImageHelper.h"
-#define ROUND_SIZE_UP(_size, _align)                                           \
-    (((size_t)(_size) + (size_t)(_align)-1) & -((size_t)(_align)))
+#define ROUND_SIZE_UP( _size, _align )      (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
-#if defined(__APPLE__)
-#define kPageSize 4096
-#include <sys/mman.h>
-#include <stdlib.h>
+#if defined( __APPLE__ )
+    #define kPageSize       4096
+    #include <sys/mman.h>
+    #include <stdlib.h>
 #elif defined(__linux__)
-#include <unistd.h>
-#define kPageSize (getpagesize())
+    #include <unistd.h>
+    #define kPageSize  (getpagesize())
-clProtectedImage::clProtectedImage(cl_context context, cl_mem_flags mem_flags,
-                                   const cl_image_format *fmt, size_t width,
-                                   cl_int *errcode_ret)
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
-    cl_int err = Create(context, mem_flags, fmt, width);
-    if (errcode_ret != NULL) *errcode_ret = err;
+    cl_int err = Create( context, mem_flags, fmt, width );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
-cl_int clProtectedImage::Create(cl_context context, cl_mem_flags mem_flags,
-                                const cl_image_format *fmt, size_t width)
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
     cl_int error;
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
     int protect_pages = 1;
     cl_device_id devices[16];
     size_t number_of_devices;
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
-                             devices, &number_of_devices);
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
     test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
     number_of_devices /= sizeof(cl_device_id);
-    for (int i = 0; i < (int)number_of_devices; i++)
-    {
+    for (int i=0; i<(int)number_of_devices; i++) {
         cl_device_type type;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
-                                NULL);
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
         test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
-        if (type == CL_DEVICE_TYPE_GPU)
-        {
+        if (type == CL_DEVICE_TYPE_GPU) {
             protect_pages = 0;
-    if (protect_pages)
-    {
+    if (protect_pages) {
         size_t pixelBytes = get_pixel_bytes(fmt);
-        size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
         size_t rowStride = rowBytes + kPageSize;
         // create backing store
         backingStoreSize = rowStride + 8 * rowStride;
-        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
-                            MAP_ANON | MAP_PRIVATE, 0, 0);
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
         // add guard pages
         size_t row;
-        char *p = (char *)backingStore;
-        char *imagePtr = (char *)backingStore + 4 * rowStride;
-        for (row = 0; row < 4; row++)
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
         p += rowBytes;
-        mprotect(p, kPageSize, PROT_NONE);
-        p += rowStride;
+        mprotect( p, kPageSize, PROT_NONE );        p += rowStride;
         p -= rowBytes;
-        for (row = 0; row < 4; row++)
+        for( row = 0; row < 4; row++ )
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
-        if (getenv("CL_ALIGN_RIGHT"))
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
             static int spewEnv = 1;
-            if (spewEnv)
+            if(spewEnv)
-                log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
-                         "edge of page\n");
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
                 spewEnv = 0;
             imagePtr += rowBytes - pixelBytes * width;
-        image = create_image_1d(context, mem_flags | CL_MEM_USE_HOST_PTR, fmt,
-                                width, rowStride, imagePtr, NULL, &error);
-    }
-    else
-    {
+        image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
+    } else {
         backingStore = NULL;
-        image = create_image_1d(context, mem_flags, fmt, width, 0, NULL, NULL,
-                                &error);
+        image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
     backingStore = NULL;
-    image =
-        create_image_1d(context, mem_flags, fmt, width, 0, NULL, NULL, &error);
+    image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
     return error;
-clProtectedImage::clProtectedImage(cl_context context, cl_mem_flags mem_flags,
-                                   const cl_image_format *fmt, size_t width,
-                                   size_t height, cl_int *errcode_ret)
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
-    cl_int err = Create(context, mem_flags, fmt, width, height);
-    if (errcode_ret != NULL) *errcode_ret = err;
+    cl_int err = Create( context, mem_flags, fmt, width, height );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
-cl_int clProtectedImage::Create(cl_context context, cl_mem_flags mem_flags,
-                                const cl_image_format *fmt, size_t width,
-                                size_t height)
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
     cl_int error;
-#if defined(__APPLE__)
-    int protect_pages = 1;
-    cl_device_id devices[16];
-    size_t number_of_devices;
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
-                             devices, &number_of_devices);
-    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
-    number_of_devices /= sizeof(cl_device_id);
-    for (int i = 0; i < (int)number_of_devices; i++)
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+    // create backing store
+    backingStoreSize = height * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
-        cl_device_type type;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
-                                NULL);
-        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
-        if (type == CL_DEVICE_TYPE_GPU)
-        {
-            protect_pages = 0;
-            break;
-        }
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
-    if (protect_pages)
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
-        size_t pixelBytes = get_pixel_bytes(fmt);
-        size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
-        size_t rowStride = rowBytes + kPageSize;
-        // create backing store
-        backingStoreSize = height * rowStride + 8 * rowStride;
-        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
-                            MAP_ANON | MAP_PRIVATE, 0, 0);
-        // add guard pages
-        size_t row;
-        char *p = (char *)backingStore;
-        char *imagePtr = (char *)backingStore + 4 * rowStride;
-        for (row = 0; row < 4; row++)
-        {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
-        }
-        p += rowBytes;
-        for (row = 0; row < height; row++)
-        {
-            mprotect(p, kPageSize, PROT_NONE);
-            p += rowStride;
-        }
-        p -= rowBytes;
-        for (row = 0; row < 4; row++)
-        {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
-        }
-        if (getenv("CL_ALIGN_RIGHT"))
-        {
-            static int spewEnv = 1;
-            if (spewEnv)
-            {
-                log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
-                         "edge of page\n");
-                spewEnv = 0;
-            }
-            imagePtr += rowBytes - pixelBytes * width;
-        }
-        image = create_image_2d(context, mem_flags | CL_MEM_USE_HOST_PTR, fmt,
-                                width, height, rowStride, imagePtr, &error);
+      static int spewEnv = 1;
+      if(spewEnv)
+      {
+        log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+        spewEnv = 0;
+      }
+      imagePtr += rowBytes - pixelBytes * width;
-    else
-    {
-        backingStore = NULL;
-        image = create_image_2d(context, mem_flags, fmt, width, height, 0, NULL,
-                                &error);
-    }
+      image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+      image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+  }
-    backingStore = NULL;
-    image = create_image_2d(context, mem_flags, fmt, width, height, 0, NULL,
-                            &error);
+  backingStore = NULL;
+  image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
     return error;
-clProtectedImage::clProtectedImage(cl_context context, cl_mem_flags mem_flags,
-                                   const cl_image_format *fmt, size_t width,
-                                   size_t height, size_t depth,
-                                   cl_int *errcode_ret)
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
-    cl_int err = Create(context, mem_flags, fmt, width, height, depth);
-    if (errcode_ret != NULL) *errcode_ret = err;
+    cl_int err = Create( context, mem_flags, fmt, width, height, depth );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
-cl_int clProtectedImage::Create(cl_context context, cl_mem_flags mem_flags,
-                                const cl_image_format *fmt, size_t width,
-                                size_t height, size_t depth)
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
     cl_int error;
-#if defined(__APPLE__)
-    int protect_pages = 1;
-    cl_device_id devices[16];
-    size_t number_of_devices;
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
-                             devices, &number_of_devices);
-    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
-    number_of_devices /= sizeof(cl_device_id);
-    for (int i = 0; i < (int)number_of_devices; i++)
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+    // create backing store
+    backingStoreSize = height * depth * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
-        cl_device_type type;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
-                                NULL);
-        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
-        if (type == CL_DEVICE_TYPE_GPU)
-        {
-            protect_pages = 0;
-            break;
-        }
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height*depth; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
-    if (protect_pages)
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
-        size_t pixelBytes = get_pixel_bytes(fmt);
-        size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
-        size_t rowStride = rowBytes + kPageSize;
-        // create backing store
-        backingStoreSize = height * depth * rowStride + 8 * rowStride;
-        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
-                            MAP_ANON | MAP_PRIVATE, 0, 0);
-        // add guard pages
-        size_t row;
-        char *p = (char *)backingStore;
-        char *imagePtr = (char *)backingStore + 4 * rowStride;
-        for (row = 0; row < 4; row++)
+        static int spewEnv = 1;
+        if(spewEnv)
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
+            log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+            spewEnv = 0;
-        p += rowBytes;
-        for (row = 0; row < height * depth; row++)
-        {
-            mprotect(p, kPageSize, PROT_NONE);
-            p += rowStride;
-        }
-        p -= rowBytes;
-        for (row = 0; row < 4; row++)
-        {
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
-        }
-        if (getenv("CL_ALIGN_RIGHT"))
-        {
-            static int spewEnv = 1;
-            if (spewEnv)
-            {
-                log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
-                         "edge of page\n");
-                spewEnv = 0;
-            }
-            imagePtr += rowBytes - pixelBytes * width;
-        }
-        image = create_image_3d(context, mem_flags | CL_MEM_USE_HOST_PTR, fmt,
-                                width, height, depth, rowStride,
-                                height * rowStride, imagePtr, &error);
+        imagePtr += rowBytes - pixelBytes * width;
-    else
-    {
-        backingStore = NULL;
-        image = create_image_3d(context, mem_flags, fmt, width, height, depth,
-                                0, 0, NULL, &error);
-    }
+    image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
+  }
     backingStore = NULL;
-    image = create_image_3d(context, mem_flags, fmt, width, height, depth, 0, 0,
-                            NULL, &error);
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
@@ -330,51 +276,37 @@
-clProtectedImage::clProtectedImage(cl_context context,
-                                   cl_mem_object_type imageType,
-                                   cl_mem_flags mem_flags,
-                                   const cl_image_format *fmt, size_t width,
-                                   size_t height, size_t depth,
-                                   size_t arraySize, cl_int *errcode_ret)
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
-    cl_int err = Create(context, imageType, mem_flags, fmt, width, height,
-                        depth, arraySize);
-    if (errcode_ret != NULL) *errcode_ret = err;
+    cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
-cl_int clProtectedImage::Create(cl_context context,
-                                cl_mem_object_type imageType,
-                                cl_mem_flags mem_flags,
-                                const cl_image_format *fmt, size_t width,
-                                size_t height, size_t depth, size_t arraySize)
+cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
     cl_int error;
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
     int protect_pages = 1;
     cl_device_id devices[16];
     size_t number_of_devices;
-    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
-                             devices, &number_of_devices);
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
     test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
     number_of_devices /= sizeof(cl_device_id);
-    for (int i = 0; i < (int)number_of_devices; i++)
-    {
+    for (int i=0; i<(int)number_of_devices; i++) {
         cl_device_type type;
-        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
-                                NULL);
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
         test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
-        if (type == CL_DEVICE_TYPE_GPU)
-        {
+        if (type == CL_DEVICE_TYPE_GPU) {
             protect_pages = 0;
-    if (protect_pages)
-    {
+    if (protect_pages) {
         size_t pixelBytes = get_pixel_bytes(fmt);
-        size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
         size_t rowStride = rowBytes + kPageSize;
         // create backing store
@@ -393,44 +325,37 @@
                 backingStoreSize = arraySize * rowStride + 8 * rowStride;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                backingStoreSize =
-                    height * arraySize * rowStride + 8 * rowStride;
+                backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
-        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
-                            MAP_ANON | MAP_PRIVATE, 0, 0);
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
         // add guard pages
         size_t row;
-        char *p = (char *)backingStore;
-        char *imagePtr = (char *)backingStore + 4 * rowStride;
-        for (row = 0; row < 4; row++)
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
         p += rowBytes;
-        size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1)
-            * (arraySize > 0 ? arraySize : 1);
-        for (row = 0; row < sz; row++)
+        size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
+        for( row = 0; row < sz; row++ )
-            mprotect(p, kPageSize, PROT_NONE);
-            p += rowStride;
+            mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
         p -= rowBytes;
-        for (row = 0; row < 4; row++)
+        for( row = 0; row < 4; row++ )
-            mprotect(p, rowStride, PROT_NONE);
-            p += rowStride;
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
-        if (getenv("CL_ALIGN_RIGHT"))
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
             static int spewEnv = 1;
-            if (spewEnv)
+            if(spewEnv)
-                log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
-                         "edge of page\n");
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
                 spewEnv = 0;
             imagePtr += rowBytes - pixelBytes * width;
@@ -439,61 +364,42 @@
         switch (imageType)
             case CL_MEM_OBJECT_IMAGE1D:
-                image = create_image_1d(
-                    context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
-                    rowStride, imagePtr, NULL, &error);
+                image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
             case CL_MEM_OBJECT_IMAGE2D:
-                image = create_image_2d(
-                    context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
-                    height, rowStride, imagePtr, &error);
+                image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
             case CL_MEM_OBJECT_IMAGE3D:
-                image =
-                    create_image_3d(context, mem_flags | CL_MEM_USE_HOST_PTR,
-                                    fmt, width, height, depth, rowStride,
-                                    height * rowStride, imagePtr, &error);
+                image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                image = create_image_1d_array(
-                    context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
-                    arraySize, rowStride, rowStride, imagePtr, &error);
+                image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                image = create_image_2d_array(
-                    context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
-                    height, arraySize, rowStride, height * rowStride, imagePtr,
-                    &error);
+                image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
-    }
-    else
-    {
+    } else {
         backingStore = NULL;
         switch (imageType)
             case CL_MEM_OBJECT_IMAGE1D:
-                image = create_image_1d(context, mem_flags, fmt, width, 0, NULL,
-                                        NULL, &error);
+                image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
             case CL_MEM_OBJECT_IMAGE2D:
-                image = create_image_2d(context, mem_flags, fmt, width, height,
-                                        0, NULL, &error);
+                image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
             case CL_MEM_OBJECT_IMAGE3D:
-                image = create_image_3d(context, mem_flags, fmt, width, height,
-                                        depth, 0, 0, NULL, &error);
+                image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                image = create_image_1d_array(context, mem_flags, fmt, width,
-                                              arraySize, 0, 0, NULL, &error);
+                image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                image = create_image_2d_array(context, mem_flags, fmt, width,
-                                              height, arraySize, 0, 0, NULL,
-                                              &error);
+                image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
@@ -501,25 +407,19 @@
     switch (imageType)
         case CL_MEM_OBJECT_IMAGE1D:
-            image = create_image_1d(context, mem_flags, fmt, width, 0, NULL,
-                                    NULL, &error);
+            image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
         case CL_MEM_OBJECT_IMAGE2D:
-            image = create_image_2d(context, mem_flags, fmt, width, height, 0,
-                                    NULL, &error);
+            image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
         case CL_MEM_OBJECT_IMAGE3D:
-            image = create_image_3d(context, mem_flags, fmt, width, height,
-                                    depth, 0, 0, NULL, &error);
+            image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
-            image = create_image_1d_array(context, mem_flags, fmt, width,
-                                          arraySize, 0, 0, NULL, &error);
+            image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
-            image =
-                create_image_2d_array(context, mem_flags, fmt, width, height,
-                                      arraySize, 0, 0, NULL, &error);
+            image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
@@ -527,52 +427,55 @@
  * clProtectedArray implementation
-clProtectedArray::clProtectedArray() { mBuffer = mValidBuffer = NULL; }
-clProtectedArray::clProtectedArray(size_t sizeInBytes)
     mBuffer = mValidBuffer = NULL;
-    Allocate(sizeInBytes);
+clProtectedArray::clProtectedArray( size_t sizeInBytes )
+    mBuffer = mValidBuffer = NULL;
+    Allocate( sizeInBytes );
-    if (mBuffer != NULL)
-    {
-#if defined(__APPLE__)
-        int error = munmap(mBuffer, mRealSize);
-        if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
+    if( mBuffer != NULL ) {
+#if defined( __APPLE__ )
+        int error = munmap( mBuffer, mRealSize );
+      if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
-        free(mBuffer);
+    free( mBuffer );
-    }
+  }
-void clProtectedArray::Allocate(size_t sizeInBytes)
+void clProtectedArray::Allocate( size_t sizeInBytes )
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
-    // Allocate enough space to: round up our actual allocation to an even
-    // number of pages and allocate two pages on either side
-    mRoundedSize = ROUND_SIZE_UP(sizeInBytes, kPageSize);
+    // Allocate enough space to: round up our actual allocation to an even number of pages
+    // and allocate two pages on either side
+    mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
     mRealSize = mRoundedSize + kPageSize * 2;
-    // Use mmap here to ensure we start on a page boundary, so the mprotect
-    // calls will work OK
-    mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE,
-                           MAP_ANON | MAP_PRIVATE, 0, 0);
+    // Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
+    mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
     mValidBuffer = mBuffer + kPageSize;
     // Protect guard area from access
-    mprotect(mValidBuffer - kPageSize, kPageSize, PROT_NONE);
-    mprotect(mValidBuffer + mRoundedSize, kPageSize, PROT_NONE);
+    mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
+    mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
-    mRoundedSize = mRealSize = sizeInBytes;
-    mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
+  mRoundedSize = mRealSize = sizeInBytes;
+  mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
diff --git a/test_common/harness/typeWrappers.h b/test_common/harness/typeWrappers.h
index 9a58a9d..384493f 100644
--- a/test_common/harness/typeWrappers.h
+++ b/test_common/harness/typeWrappers.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -31,376 +31,299 @@
 /* cl_context wrapper */
-class clContextWrapper {
-    clContextWrapper() { mContext = NULL; }
-    clContextWrapper(cl_context program) { mContext = program; }
-    ~clContextWrapper()
-    {
-        if (mContext != NULL) clReleaseContext(mContext);
-    }
+class clContextWrapper
+    public:
+        clContextWrapper() { mContext = NULL; }
+        clContextWrapper( cl_context program ) { mContext = program; }
+        ~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
-    clContextWrapper &operator=(const cl_context &rhs)
-    {
-        mContext = rhs;
-        return *this;
-    }
-    operator cl_context() const { return mContext; }
+        clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
+        operator cl_context() const { return mContext; }
-    cl_context *operator&() { return &mContext; }
+        cl_context * operator&() { return &mContext; }
-    bool operator==(const cl_context &rhs) { return mContext == rhs; }
+        bool operator==( const cl_context &rhs ) { return mContext == rhs; }
-    cl_context mContext;
+    protected:
+        cl_context mContext;
 /* cl_program wrapper */
-class clProgramWrapper {
-    clProgramWrapper() { mProgram = NULL; }
-    clProgramWrapper(cl_program program) { mProgram = program; }
-    ~clProgramWrapper()
-    {
-        if (mProgram != NULL) clReleaseProgram(mProgram);
-    }
+class clProgramWrapper
+    public:
+        clProgramWrapper() { mProgram = NULL; }
+        clProgramWrapper( cl_program program ) { mProgram = program; }
+        ~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
-    clProgramWrapper &operator=(const cl_program &rhs)
-    {
-        mProgram = rhs;
-        return *this;
-    }
-    operator cl_program() const { return mProgram; }
+        clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
+        operator cl_program() const { return mProgram; }
-    cl_program *operator&() { return &mProgram; }
+        cl_program * operator&() { return &mProgram; }
-    bool operator==(const cl_program &rhs) { return mProgram == rhs; }
+        bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
-    cl_program mProgram;
+    protected:
+        cl_program mProgram;
 /* cl_kernel wrapper */
-class clKernelWrapper {
-    clKernelWrapper() { mKernel = NULL; }
-    clKernelWrapper(cl_kernel kernel) { mKernel = kernel; }
-    ~clKernelWrapper()
-    {
-        if (mKernel != NULL) clReleaseKernel(mKernel);
-    }
+class clKernelWrapper
+    public:
+        clKernelWrapper() { mKernel = NULL; }
+        clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
+        ~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
-    clKernelWrapper &operator=(const cl_kernel &rhs)
-    {
-        mKernel = rhs;
-        return *this;
-    }
-    operator cl_kernel() const { return mKernel; }
+        clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
+        operator cl_kernel() const { return mKernel; }
-    cl_kernel *operator&() { return &mKernel; }
+        cl_kernel * operator&() { return &mKernel; }
-    bool operator==(const cl_kernel &rhs) { return mKernel == rhs; }
+        bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
-    cl_kernel mKernel;
+    protected:
+        cl_kernel mKernel;
 /* cl_mem (stream) wrapper */
-class clMemWrapper {
-    clMemWrapper() { mMem = NULL; }
-    clMemWrapper(cl_mem mem) { mMem = mem; }
-    ~clMemWrapper()
-    {
-        if (mMem != NULL) clReleaseMemObject(mMem);
-    }
+class clMemWrapper
+    public:
+        clMemWrapper() { mMem = NULL; }
+        clMemWrapper( cl_mem mem ) { mMem = mem; }
+        ~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
-    clMemWrapper &operator=(const cl_mem &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_mem() const { return mMem; }
+        clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
+        operator cl_mem() const { return mMem; }
-    cl_mem *operator&() { return &mMem; }
+        cl_mem * operator&() { return &mMem; }
-    bool operator==(const cl_mem &rhs) { return mMem == rhs; }
+        bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
-    cl_mem mMem;
+    protected:
+        cl_mem mMem;
-class clProtectedImage {
-    clProtectedImage()
-    {
-        image = NULL;
-        backingStore = NULL;
-    }
-    clProtectedImage(cl_context context, cl_mem_flags flags,
-                     const cl_image_format *fmt, size_t width,
-                     cl_int *errcode_ret);
-    clProtectedImage(cl_context context, cl_mem_flags flags,
-                     const cl_image_format *fmt, size_t width, size_t height,
-                     cl_int *errcode_ret);
-    clProtectedImage(cl_context context, cl_mem_flags flags,
-                     const cl_image_format *fmt, size_t width, size_t height,
-                     size_t depth, cl_int *errcode_ret);
-    clProtectedImage(cl_context context, cl_mem_object_type imageType,
-                     cl_mem_flags flags, const cl_image_format *fmt,
-                     size_t width, size_t height, size_t depth,
-                     size_t arraySize, cl_int *errcode_ret);
-    ~clProtectedImage()
-    {
-        if (image != NULL) clReleaseMemObject(image);
+class clProtectedImage
+    public:
+        clProtectedImage() { image = NULL; backingStore = NULL; }
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
+        ~clProtectedImage()
+        {
+            if( image != NULL )
+                clReleaseMemObject( image );
-#if defined(__APPLE__)
-        if (backingStore) munmap(backingStore, backingStoreSize);
+#if defined( __APPLE__ )
+            if(backingStore)
+                munmap(backingStore, backingStoreSize);
-    }
+        }
-    cl_int Create(cl_context context, cl_mem_flags flags,
-                  const cl_image_format *fmt, size_t width);
-    cl_int Create(cl_context context, cl_mem_flags flags,
-                  const cl_image_format *fmt, size_t width, size_t height);
-    cl_int Create(cl_context context, cl_mem_flags flags,
-                  const cl_image_format *fmt, size_t width, size_t height,
-                  size_t depth);
-    cl_int Create(cl_context context, cl_mem_object_type imageType,
-                  cl_mem_flags flags, const cl_image_format *fmt, size_t width,
-                  size_t height, size_t depth, size_t arraySize);
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
+        cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
-    clProtectedImage &operator=(const cl_mem &rhs)
-    {
-        image = rhs;
-        backingStore = NULL;
-        return *this;
-    }
-    operator cl_mem() { return image; }
+        clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
+        operator cl_mem() { return image; }
-    cl_mem *operator&() { return &image; }
+        cl_mem * operator&() { return &image; }
-    bool operator==(const cl_mem &rhs) { return image == rhs; }
+        bool operator==( const cl_mem &rhs ) { return image == rhs; }
-    void *backingStore;
-    size_t backingStoreSize;
-    cl_mem image;
+    protected:
+        void *backingStore;
+        size_t backingStoreSize;
+        cl_mem  image;
 /* cl_command_queue wrapper */
-class clCommandQueueWrapper {
-    clCommandQueueWrapper() { mMem = NULL; }
-    clCommandQueueWrapper(cl_command_queue mem) { mMem = mem; }
-    ~clCommandQueueWrapper()
-    {
-        if (mMem != NULL)
-        {
-            clReleaseCommandQueue(mMem);
-        }
-    }
+class clCommandQueueWrapper
+    public:
+        clCommandQueueWrapper() { mMem = NULL; }
+        clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
+  ~clCommandQueueWrapper() { if( mMem != NULL ) { clReleaseCommandQueue( mMem ); } }
-    clCommandQueueWrapper &operator=(const cl_command_queue &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_command_queue() const { return mMem; }
+        clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
+        operator cl_command_queue() const { return mMem; }
-    cl_command_queue *operator&() { return &mMem; }
+        cl_command_queue * operator&() { return &mMem; }
-    bool operator==(const cl_command_queue &rhs) { return mMem == rhs; }
+        bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
-    cl_command_queue mMem;
+    protected:
+        cl_command_queue mMem;
 /* cl_sampler wrapper */
-class clSamplerWrapper {
-    clSamplerWrapper() { mMem = NULL; }
-    clSamplerWrapper(cl_sampler mem) { mMem = mem; }
-    ~clSamplerWrapper()
-    {
-        if (mMem != NULL) clReleaseSampler(mMem);
-    }
+class clSamplerWrapper
+    public:
+        clSamplerWrapper() { mMem = NULL; }
+        clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
+        ~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
-    clSamplerWrapper &operator=(const cl_sampler &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_sampler() const { return mMem; }
+        clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
+        operator cl_sampler() const { return mMem; }
-    cl_sampler *operator&() { return &mMem; }
+        cl_sampler * operator&() { return &mMem; }
-    bool operator==(const cl_sampler &rhs) { return mMem == rhs; }
+        bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
-    cl_sampler mMem;
+    protected:
+        cl_sampler mMem;
 /* cl_event wrapper */
-class clEventWrapper {
-    clEventWrapper() { mMem = NULL; }
-    clEventWrapper(cl_event mem) { mMem = mem; }
-    ~clEventWrapper()
-    {
-        if (mMem != NULL) clReleaseEvent(mMem);
-    }
+class clEventWrapper
+    public:
+        clEventWrapper() { mMem = NULL; }
+        clEventWrapper( cl_event mem ) { mMem = mem; }
+        ~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
-    clEventWrapper &operator=(const cl_event &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_event() const { return mMem; }
+        clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
+        operator cl_event() const { return mMem; }
-    cl_event *operator&() { return &mMem; }
+        cl_event * operator&() { return &mMem; }
-    bool operator==(const cl_event &rhs) { return mMem == rhs; }
+        bool operator==( const cl_event &rhs ) { return mMem == rhs; }
-    cl_event mMem;
+    protected:
+        cl_event mMem;
 /* Generic protected memory buffer, for verifying access within bounds */
-class clProtectedArray {
-    clProtectedArray();
-    clProtectedArray(size_t sizeInBytes);
-    virtual ~clProtectedArray();
+class clProtectedArray
+    public:
+        clProtectedArray();
+        clProtectedArray( size_t sizeInBytes );
+        virtual ~clProtectedArray();
-    void Allocate(size_t sizeInBytes);
+        void    Allocate( size_t sizeInBytes );
-    operator void *() { return (void *)mValidBuffer; }
-    operator const void *() const { return (const void *)mValidBuffer; }
+        operator void *()        { return (void *)mValidBuffer; }
+        operator const void *() const { return (const void *)mValidBuffer; }
-    char *mBuffer;
-    char *mValidBuffer;
-    size_t mRealSize, mRoundedSize;
+    protected:
+         char *    mBuffer;
+         char * mValidBuffer;
+        size_t    mRealSize, mRoundedSize;
-class RandomSeed {
-    RandomSeed(cl_uint seed)
-    {
-        if (seed) log_info("(seed = %10.10u) ", seed);
-        mtData = init_genrand(seed);
-    }
-    ~RandomSeed()
-    {
-        if (gReSeed) gRandomSeed = genrand_int32(mtData);
-        free_mtdata(mtData);
-    }
+class RandomSeed
+    public:
+        RandomSeed( cl_uint seed  ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
+        ~RandomSeed()
+        {
+            if( gReSeed )
+                gRandomSeed = genrand_int32( mtData );
+            free_mtdata(mtData);
+        }
-    operator MTdata() { return mtData; }
+        operator MTdata ()     {return mtData;}
-    MTdata mtData;
+    protected:
+        MTdata mtData;
-template <typename T> class BufferOwningPtr {
-    BufferOwningPtr(BufferOwningPtr const &); // do not implement
-    void operator=(BufferOwningPtr const &); // do not implement
+template <typename T> class BufferOwningPtr
+  BufferOwningPtr(BufferOwningPtr const &); // do not implement
+    void operator=(BufferOwningPtr const &);  // do not implement
     void *ptr;
     void *map;
-    // Bytes allocated total, pointed to by map:
-    size_t mapsize;
-    // Bytes allocated in unprotected pages, pointed to by ptr:
-    size_t allocsize;
-    bool aligned;
-    explicit BufferOwningPtr(void *p = 0)
-        : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false)
-    {}
-    explicit BufferOwningPtr(void *p, void *m, size_t s)
-        : ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
-    {
-#if !defined(__APPLE__)
-        if (m)
+  size_t mapsize;   // Bytes allocated total, pointed to by map.
+  size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
+  bool aligned;
+  public:
+  explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
+  explicit BufferOwningPtr(void *p, void *m, size_t s)
+    : ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
+      {
+#if ! defined( __APPLE__ )
+        if(m)
-            log_error("ERROR: unhandled code path. BufferOwningPtr allocated "
-                      "with mapped buffer!");
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
-    }
-    ~BufferOwningPtr()
-    {
-        if (map)
-        {
-#if defined(__APPLE__)
-            int error = munmap(map, mapsize);
-            if (error)
-                log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+      }
+    ~BufferOwningPtr() {
+      if (map) {
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
-        }
-        else
-        {
-            if (aligned)
-            {
-                align_free(ptr);
-            }
-            else
-            {
-                free(ptr);
-            }
-        }
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
-    void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0,
-               bool aligned_ = false)
-    {
-        if (map)
-        {
-#if defined(__APPLE__)
-            int error = munmap(map, mapsize);
-            if (error)
-                log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+  void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
+      if (map){
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
-            log_error("ERROR: unhandled code path. BufferOwningPtr reset with "
-                      "mapped buffer!");
-            abort();
+        log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
+        abort();
-        }
-        else
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
+      ptr = p;
+      map = m;
+      mapsize = mapsize_;
+      allocsize =  (ptr != NULL) ? allocsize_ : 0; // Force allocsize to zero if ptr is NULL.
+      aligned = aligned_;
+#if ! defined( __APPLE__ )
+        if(m)
-            if (aligned)
-            {
-                align_free(ptr);
-            }
-            else
-            {
-                free(ptr);
-            }
-        }
-        ptr = p;
-        map = m;
-        mapsize = mapsize_;
-        // Force allocsize to zero if ptr is NULL:
-        allocsize = (ptr != NULL) ? allocsize_ : 0;
-        aligned = aligned_;
-#if !defined(__APPLE__)
-        if (m)
-        {
-            log_error("ERROR: unhandled code path. BufferOwningPtr allocated "
-                      "with mapped buffer!");
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
-    operator T *() { return (T *)ptr; }
+    operator T*() { return (T*)ptr; }
-    size_t getSize() const { return allocsize; };
+      size_t getSize() const { return allocsize; };
 #endif // _typeWrappers_h
diff --git a/test_conformance/CMakeLists.txt b/test_conformance/CMakeLists.txt
index 363ece8..b5125be 100644
--- a/test_conformance/CMakeLists.txt
+++ b/test_conformance/CMakeLists.txt
@@ -21,7 +21,6 @@
 add_subdirectory( device_partition )
 add_subdirectory( events )
-add_subdirectory( extensions )
 add_subdirectory( geometrics )
    add_subdirectory( gl )
@@ -40,7 +39,8 @@
 add_subdirectory( relationals )
 add_subdirectory( select )
 add_subdirectory( thread_dimensions )
-add_subdirectory( vectors )
+add_subdirectory( vec_align )
+add_subdirectory( vec_step )
 add_subdirectory( c11_atomics )
 add_subdirectory( device_execution )
 add_subdirectory( non_uniform_work_group )
@@ -50,15 +50,13 @@
 add_subdirectory( workgroups )
 add_subdirectory( pipes )
 add_subdirectory( device_timer )
+add_subdirectory( clcpp )
 add_subdirectory( spirv_new )
 add_subdirectory( spir )
 file(GLOB CSV_FILES "opencl_conformance_tests_*.csv")
 # Copy .csv files
 foreach(FILE ${CSV_FILES})
diff --git a/test_conformance/SVM/main.cpp b/test_conformance/SVM/main.cpp
index 56fb24f..fe4aa59 100644
--- a/test_conformance/SVM/main.cpp
+++ b/test_conformance/SVM/main.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -213,15 +213,14 @@
       return -1;
     bool extensions_supported = true;
-    for (auto extension : extensions_list)
+    for (auto extension : extensions_list) 
-        if (!is_extension_available(devices[i], extension.c_str()))
-        {
-            log_error("Required extension not found - device id %d - %s\n", i,
-                      extension.c_str());
-            extensions_supported = false;
-            break;
-        }
+      if (!is_extension_available(devices[i], extension.c_str())) 
+      {
+        log_error("Required extension not found - device id %d - %s\n", i, extension.c_str());
+        extensions_supported = false;
+        break;
+      }
     if((caps & required_svm_caps) == required_svm_caps && extensions_supported)
@@ -250,11 +249,10 @@
     test_error(error, "clCreateCommandQueue failed");
-  if (ppCodeString)
+  if(ppCodeString)
-      error =
-          create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0);
-      test_error(error, "failed to create program");
+    error = create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0, "-cl-std=CL2.0");
+    test_error( error, "failed to create program" );
   return 0;
@@ -282,12 +280,9 @@
 test_status InitCL(cl_device_id device) {
   auto version = get_device_cl_version(device);
   auto expected_min_version = Version(2, 0);
-  if (version < expected_min_version)
-  {
-      version_expected_info("Test", "OpenCL",
-                            expected_min_version.to_string().c_str(),
-                            version.to_string().c_str());
-      return TEST_SKIP;
+  if (version < expected_min_version) {
+    version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
+    return TEST_SKIP;
   int error;
diff --git a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp
index b28db41..42ea0bd 100644
--- a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp
+++ b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,33 +16,27 @@
 #include "common.h"
 static char hash_table_kernel[] =
-    "#if 0\n"
-    "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
-    "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
-    "#endif\n"
-    "typedef struct BinNode {\n"
-    " int value;\n"
-    " atomic_uintptr_t pNext;\n"
-    "} BinNode;\n"
+  "#if 0\n"
+  "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
+  "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"
+  "#endif\n"
+  "typedef struct BinNode {\n"
+  " int value;\n"
+  " atomic_uintptr_t pNext;\n"
+  "} BinNode;\n"
-    "__kernel void build_hash_table(__global uint* input, __global BinNode* "
-    "pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
-    "{\n"
-    " __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, "
-    "1u, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
-    " uint i = get_global_id(0);\n"
-    " uint b = input[i] % numBins;\n"
-    " pNew->value = input[i];\n"
-    " uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), "
-    "memory_order_seq_cst, memory_scope_all_svm_devices);\n"
-    " do\n"
-    " {\n"
-    "   atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, "
-    "memory_scope_all_svm_devices);\n" // always inserting at head of list
-    " } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), "
-    "&next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, "
-    "memory_scope_all_svm_devices));\n"
-    "}\n";
+  "__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
+  "{\n"
+  " __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
+  " uint i = get_global_id(0);\n"
+  " uint b = input[i] % numBins;\n"
+  " pNew->value = input[i];\n"
+  " uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n"
+  " do\n"
+  " {\n"
+  "   atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list
+  " } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n"
+  "}\n";
 typedef struct BinNode{
   cl_uint value;
diff --git a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp
index 0b94cbf..4cc3495 100644
--- a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp
+++ b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,19 +17,15 @@
 const char *find_targets_kernel[] = {
-    "__kernel void find_targets(__global uint* image, uint target, volatile "
-    "__global atomic_uint *numTargetsFound, volatile __global atomic_uint "
-    "*targetLocations)\n"
-    "{\n"
-    " size_t i = get_global_id(0);\n"
-    " uint index;\n"
-    " if(image[i] == target) {\n"
-    "   index = atomic_fetch_add_explicit(numTargetsFound, 1u, "
-    "memory_order_relaxed, memory_scope_device); \n"
-    "   atomic_exchange_explicit(&targetLocations[index], i, "
-    "memory_order_relaxed, memory_scope_all_svm_devices); \n"
-    " }\n"
-    "}\n"
+  "__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n"
+  "{\n"
+  " size_t i = get_global_id(0);\n"
+  " uint index;\n"
+  " if(image[i] == target) {\n"
+  "   index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n"
+  "   atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n"
+  " }\n"
+  "}\n"
diff --git a/test_conformance/SVM/test_migrate.cpp b/test_conformance/SVM/test_migrate.cpp
index 2a1ce05..3a39eae 100644
--- a/test_conformance/SVM/test_migrate.cpp
+++ b/test_conformance/SVM/test_migrate.cpp
@@ -199,24 +199,6 @@
     error = clFlush(queues[1]);
     test_error(error, "clFlush failed");
-    // Check the event command type for clEnqueueSVMMigrateMem (OpenCL 3.0 and
-    // newer)
-    Version version = get_device_cl_version(deviceID);
-    if (version >= Version(3, 0))
-    {
-        cl_command_type commandType;
-        error = clGetEventInfo(evs[3], CL_EVENT_COMMAND_TYPE,
-                               sizeof(commandType), &commandType, NULL);
-        test_error(error, "clGetEventInfo failed");
-        if (commandType != CL_COMMAND_SVM_MIGRATE_MEM)
-        {
-            log_error("Invalid command type returned for "
-                      "clEnqueueSVMMigrateMem: %X\n",
-                      commandType);
-            return TEST_FAIL;
-        }
-    }
     error = wait_and_release("first batch", evs, 8);
     if (error)
         return -1;
diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt
index d3e6c6a..d330b4b 100644
--- a/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/api/CMakeLists.txt
@@ -2,19 +2,16 @@
-         negative_platform.cpp
-         test_api_consistency.cpp
+         test_queries_compatibility.cpp
-         test_kernel_private_memory_size.cpp
-         test_kernel_attributes.cpp
@@ -22,19 +19,14 @@
+         test_kernel_arg_info_compatibility.cpp
-         test_min_image_formats.cpp
-         test_queue.cpp
-         test_context_destructor_callback.cpp
-         test_mem_object_properties_queries.cpp
-         test_queue_properties_queries.cpp
-         test_pipe_properties_queries.cpp
diff --git a/test_conformance/api/main.cpp b/test_conformance/api/main.cpp
index fa76a40..72cbbea 100644
--- a/test_conformance/api/main.cpp
+++ b/test_conformance/api/main.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -26,133 +26,107 @@
 // FIXME: To use certain functions in harness/imageHelpers.h
-// (for example, generate_random_image_data()), the tests are required to
-// declare the following variables (<rdar://problem/11111245>):
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variables (<rdar://problem/11111245>):
 test_definition test_list[] = {
-    ADD_TEST(get_platform_info),
-    ADD_TEST_VERSION(get_sampler_info, Version(2, 0)),
-    ADD_TEST(get_sampler_info_compatibility),
-    ADD_TEST_VERSION(get_command_queue_info, Version(2, 0)),
-    ADD_TEST(get_command_queue_info_compatibility),
-    ADD_TEST(get_context_info),
-    ADD_TEST(get_device_info),
-    ADD_TEST(enqueue_task),
-    ADD_TEST(binary_get),
-    ADD_TEST(binary_create),
-    ADD_TEST(kernel_required_group_size),
+    ADD_TEST( get_platform_info ),
+    ADD_TEST_VERSION( get_sampler_info, Version(2, 0) ),
+    ADD_TEST( get_sampler_info_compatibility ),
+    ADD_TEST_VERSION( get_command_queue_info, Version(2, 0) ),
+    ADD_TEST( get_command_queue_info_compatibility ),
+    ADD_TEST( get_context_info ),
+    ADD_TEST( get_device_info ),
+    ADD_TEST( enqueue_task ),
+    ADD_TEST( binary_get ),
+    ADD_TEST( binary_create ),
+    ADD_TEST( kernel_required_group_size ),
-    ADD_TEST(release_kernel_order),
-    ADD_TEST(release_during_execute),
+    ADD_TEST( release_kernel_order ),
+    ADD_TEST( release_during_execute ),
-    ADD_TEST(load_single_kernel),
-    ADD_TEST(load_two_kernels),
-    ADD_TEST(load_two_kernels_in_one),
-    ADD_TEST(load_two_kernels_manually),
-    ADD_TEST(get_program_info_kernel_names),
-    ADD_TEST(get_kernel_arg_info),
-    ADD_TEST(create_kernels_in_program),
-    ADD_TEST(get_kernel_info),
-    ADD_TEST(kernel_private_memory_size),
-    ADD_TEST(execute_kernel_local_sizes),
-    ADD_TEST(set_kernel_arg_by_index),
-    ADD_TEST(set_kernel_arg_constant),
-    ADD_TEST(set_kernel_arg_struct_array),
-    ADD_TEST(kernel_global_constant),
-    ADD_TEST(kernel_attributes),
+    ADD_TEST( load_single_kernel ),
+    ADD_TEST( load_two_kernels ),
+    ADD_TEST( load_two_kernels_in_one ),
+    ADD_TEST( load_two_kernels_manually ),
+    ADD_TEST( get_program_info_kernel_names ),
+    ADD_TEST( get_kernel_arg_info ),
+    ADD_TEST( get_kernel_arg_info_compatibility ),
+    ADD_TEST( create_kernels_in_program ),
+    ADD_TEST( get_kernel_info ),
+    ADD_TEST( execute_kernel_local_sizes ),
+    ADD_TEST( set_kernel_arg_by_index ),
+    ADD_TEST( set_kernel_arg_constant ),
+    ADD_TEST( set_kernel_arg_struct_array ),
+    ADD_TEST( kernel_global_constant ),
-    ADD_TEST(min_max_thread_dimensions),
-    ADD_TEST(min_max_work_items_sizes),
-    ADD_TEST(min_max_work_group_size),
-    ADD_TEST(min_max_read_image_args),
-    ADD_TEST(min_max_write_image_args),
-    ADD_TEST(min_max_mem_alloc_size),
-    ADD_TEST(min_max_image_2d_width),
-    ADD_TEST(min_max_image_2d_height),
-    ADD_TEST(min_max_image_3d_width),
-    ADD_TEST(min_max_image_3d_height),
-    ADD_TEST(min_max_image_3d_depth),
-    ADD_TEST(min_max_image_array_size),
-    ADD_TEST(min_max_image_buffer_size),
-    ADD_TEST(min_max_parameter_size),
-    ADD_TEST(min_max_samplers),
-    ADD_TEST(min_max_constant_buffer_size),
-    ADD_TEST(min_max_constant_args),
-    ADD_TEST(min_max_compute_units),
-    ADD_TEST(min_max_address_bits),
-    ADD_TEST(min_max_single_fp_config),
-    ADD_TEST(min_max_double_fp_config),
-    ADD_TEST(min_max_local_mem_size),
-    ADD_TEST(min_max_kernel_preferred_work_group_size_multiple),
-    ADD_TEST(min_max_execution_capabilities),
-    ADD_TEST(min_max_queue_properties),
-    ADD_TEST(min_max_device_version),
-    ADD_TEST(min_max_language_version),
+    ADD_TEST( min_max_thread_dimensions ),
+    ADD_TEST( min_max_work_items_sizes ),
+    ADD_TEST( min_max_work_group_size ),
+    ADD_TEST( min_max_read_image_args ),
+    ADD_TEST( min_max_write_image_args ),
+    ADD_TEST( min_max_mem_alloc_size ),
+    ADD_TEST( min_max_image_2d_width ),
+    ADD_TEST( min_max_image_2d_height ),
+    ADD_TEST( min_max_image_3d_width ),
+    ADD_TEST( min_max_image_3d_height ),
+    ADD_TEST( min_max_image_3d_depth ),
+    ADD_TEST( min_max_image_array_size ),
+    ADD_TEST( min_max_image_buffer_size ),
+    ADD_TEST( min_max_parameter_size ),
+    ADD_TEST( min_max_samplers ),
+    ADD_TEST( min_max_constant_buffer_size ),
+    ADD_TEST( min_max_constant_args ),
+    ADD_TEST( min_max_compute_units ),
+    ADD_TEST( min_max_address_bits ),
+    ADD_TEST( min_max_single_fp_config ),
+    ADD_TEST( min_max_double_fp_config ),
+    ADD_TEST( min_max_local_mem_size ),
+    ADD_TEST( min_max_kernel_preferred_work_group_size_multiple ),
+    ADD_TEST( min_max_execution_capabilities ),
+    ADD_TEST( min_max_queue_properties ),
+    ADD_TEST( min_max_device_version ),
+    ADD_TEST( min_max_language_version ),
-    ADD_TEST(kernel_arg_changes),
-    ADD_TEST(kernel_arg_multi_setup_random),
+    ADD_TEST( kernel_arg_changes ),
+    ADD_TEST( kernel_arg_multi_setup_random ),
-    ADD_TEST(native_kernel),
+    ADD_TEST( native_kernel ),
-    ADD_TEST(create_context_from_type),
+    ADD_TEST( create_context_from_type ),
-    ADD_TEST(platform_extensions),
-    ADD_TEST(get_platform_ids),
-    ADD_TEST(bool_type),
+    ADD_TEST( platform_extensions ),
+    ADD_TEST( get_platform_ids ),
+    ADD_TEST( bool_type ),
-    ADD_TEST(repeated_setup_cleanup),
+    ADD_TEST( repeated_setup_cleanup ),
-    ADD_TEST(retain_queue_single),
-    ADD_TEST(retain_queue_multiple),
-    ADD_TEST(retain_mem_object_single),
-    ADD_TEST(retain_mem_object_multiple),
-    ADD_TEST(retain_mem_object_set_kernel_arg),
-    ADD_TEST(min_data_type_align_size_alignment),
+    ADD_TEST( retain_queue_single ),
+    ADD_TEST( retain_queue_multiple ),
+    ADD_TEST( retain_mem_object_single ),
+    ADD_TEST( retain_mem_object_multiple ),
+    ADD_TEST( retain_mem_object_set_kernel_arg ),
+    ADD_TEST( min_data_type_align_size_alignment ),
-    ADD_TEST_VERSION(context_destructor_callback, Version(3, 0)),
-    ADD_TEST(mem_object_destructor_callback),
-    ADD_TEST(null_buffer_arg),
-    ADD_TEST(get_buffer_info),
-    ADD_TEST(get_image2d_info),
-    ADD_TEST(get_image3d_info),
-    ADD_TEST(get_image1d_info),
-    ADD_TEST(get_image1d_array_info),
-    ADD_TEST(get_image2d_array_info),
-    ADD_TEST(queue_flush_on_release),
-    ADD_TEST(queue_hint),
-    ADD_TEST(queue_properties),
-    ADD_TEST_VERSION(sub_group_dispatch, Version(2, 1)),
-    ADD_TEST_VERSION(clone_kernel, Version(2, 1)),
-    ADD_TEST_VERSION(zero_sized_enqueue, Version(2, 1)),
-    ADD_TEST_VERSION(buffer_properties_queries, Version(3, 0)),
-    ADD_TEST_VERSION(image_properties_queries, Version(3, 0)),
-    ADD_TEST_VERSION(queue_properties_queries, Version(3, 0)),
-    ADD_TEST_VERSION(pipe_properties_queries, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_svm, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_memory_model, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_device_enqueue, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_pipes, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_progvar, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_non_uniform_work_group, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_read_write_images, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_2d_image_from_buffer, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_depth_images, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_device_and_host_timer, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_il_programs, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_subgroups, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_prog_ctor_dtor, Version(3, 0)),
-    ADD_TEST_VERSION(consistency_3d_image_writes, Version(3, 0)),
-    ADD_TEST(min_image_formats),
-    ADD_TEST(negative_get_platform_info),
-    ADD_TEST(negative_get_platform_ids),
+    ADD_TEST( mem_object_destructor_callback ),
+    ADD_TEST( null_buffer_arg ),
+    ADD_TEST( get_buffer_info ),
+    ADD_TEST( get_image2d_info ),
+    ADD_TEST( get_image3d_info ),
+    ADD_TEST( get_image1d_info ),
+    ADD_TEST( get_image1d_array_info ),
+    ADD_TEST( get_image2d_array_info ),
+    ADD_TEST( queue_hint ),
+    ADD_TEST( queue_properties ),
+    ADD_TEST_VERSION( sub_group_dispatch, Version(2, 1) ),
+    ADD_TEST_VERSION( clone_kernel, Version(2, 1) ),
+    ADD_TEST_VERSION( zero_sized_enqueue, Version(2, 1) ),
-const int test_num = ARRAY_SIZE(test_list);
+const int test_num = ARRAY_SIZE( test_list );
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/api/negative_platform.cpp b/test_conformance/api/negative_platform.cpp
deleted file mode 100644
index 7d9de5d..0000000
--- a/test_conformance/api/negative_platform.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-int test_negative_get_platform_ids(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements)
-    cl_platform_id platform;
-    cl_int err = clGetPlatformIDs(0, &platform, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_VALUE,
-        "clGetPlatformIDs should return CL_INVALID_VALUE when: \"num_entries "
-        "is equal to zero and platforms is not NULL\"",
-        TEST_FAIL);
-    err = clGetPlatformIDs(1, nullptr, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_VALUE,
-        "clGetPlatformIDs should return CL_INVALID_VALUE when: \"both "
-        "num_platforms and platforms are NULL\"",
-        TEST_FAIL);
-    return TEST_PASS;
-int test_negative_get_platform_info(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements)
-    cl_platform_id platform = getPlatformFromDevice(deviceID);
-    cl_int err =
-        clGetPlatformInfo(reinterpret_cast<cl_platform_id>(deviceID),
-                          CL_PLATFORM_VERSION, sizeof(char*), nullptr, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_PLATFORM,
-        "clGetPlatformInfo should return CL_INVALID_PLATFORM  when: \"platform "
-        "is not a valid platform\" using a valid object which is NOT a "
-        "platform",
-        TEST_FAIL);
-    constexpr cl_platform_info INVALID_PARAM_VALUE = 0;
-    err = clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_VALUE,
-        "clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name "
-        "is not one of the supported values\"",
-        TEST_FAIL);
-    char* version;
-    err =
-        clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, &version, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_VALUE,
-        "clGetPlatformInfo should return CL_INVALID_VALUE when: \"size in "
-        "bytes specified by param_value_size is < size of return type and "
-        "param_value is not a NULL value\"",
-        TEST_FAIL);
-    return TEST_PASS;
diff --git a/test_conformance/api/procs.h b/test_conformance/api/procs.h
index 1bcb311..10b3ea3 100644
--- a/test_conformance/api/procs.h
+++ b/test_conformance/api/procs.h
@@ -47,10 +47,6 @@
 extern int        test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_kernel_private_memory_size(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
 extern int        test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
@@ -102,14 +98,7 @@
 extern int        test_retain_mem_object_set_kernel_arg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
-extern int test_context_destructor_callback(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_mem_object_destructor_callback(cl_device_id deviceID,
-                                               cl_context context,
-                                               cl_command_queue queue,
-                                               int num_elements);
+extern int        test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
 extern int      test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
@@ -119,88 +108,9 @@
 extern int      test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
 extern int      test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
 extern int      test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_get_kernel_arg_info_compatibility( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
 extern int      test_queue_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_clone_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_zero_sized_enqueue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_queue_properties( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
-extern int test_queue_flush_on_release(cl_device_id deviceID,
-                                       cl_context context,
-                                       cl_command_queue queue,
-                                       int num_elements);
-extern int test_buffer_properties_queries(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
-extern int test_image_properties_queries(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-extern int test_queue_properties_queries(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-int test_pipe_properties_queries(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
-extern int test_consistency_svm(cl_device_id deviceID, cl_context context,
-                                cl_command_queue queue, int num_elements);
-extern int test_consistency_memory_model(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-extern int test_consistency_device_enqueue(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_consistency_pipes(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-extern int test_consistency_progvar(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_consistency_non_uniform_work_group(cl_device_id deviceID,
-                                                   cl_context context,
-                                                   cl_command_queue queue,
-                                                   int num_elements);
-extern int test_consistency_read_write_images(cl_device_id deviceID,
-                                              cl_context context,
-                                              cl_command_queue queue,
-                                              int num_elements);
-extern int test_consistency_2d_image_from_buffer(cl_device_id deviceID,
-                                                 cl_context context,
-                                                 cl_command_queue queue,
-                                                 int num_elements);
-extern int test_consistency_depth_images(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-extern int test_consistency_device_and_host_timer(cl_device_id deviceID,
-                                                  cl_context context,
-                                                  cl_command_queue queue,
-                                                  int num_elements);
-extern int test_consistency_il_programs(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements);
-extern int test_consistency_subgroups(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements);
-extern int test_consistency_prog_ctor_dtor(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_consistency_3d_image_writes(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_min_image_formats(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-extern int test_negative_get_platform_info(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_negative_get_platform_ids(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
-extern int test_kernel_attributes(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
diff --git a/test_conformance/api/test_api_consistency.cpp b/test_conformance/api/test_api_consistency.cpp
deleted file mode 100644
index d6c4bba..0000000
--- a/test_conformance/api/test_api_consistency.cpp
+++ /dev/null
@@ -1,1150 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include "harness/testHarness.h"
-#include "harness/deviceInfo.h"
-static const char* test_kernel = R"CLC(
-__kernel void test(__global int* dst) {
-    dst[0] = 0;
-int test_consistency_svm(cl_device_id deviceID, cl_context context,
-                         cl_command_queue queue, int num_elements)
-    // clGetDeviceInfo, passing CL_DEVICE_SVM_CAPABILITIES:
-    // May return 0, indicating that device does not support Shared Virtual
-    // Memory.
-    cl_int error;
-    const size_t allocSize = 16;
-    clMemWrapper mem;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_device_svm_capabilities svmCaps = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES,
-                            sizeof(svmCaps), &svmCaps, NULL);
-    test_error(error, "Unable to query CL_DEVICE_SVM_CAPABILITIES");
-    if (svmCaps == 0)
-    {
-        // Test setup:
-        mem =
-            clCreateBuffer(context, CL_MEM_READ_WRITE, allocSize, NULL, &error);
-        test_error(error, "Unable to create test buffer");
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &test_kernel, "test");
-        test_error(error, "Unable to create test kernel");
-        // clGetMemObjectInfo, passing CL_MEM_USES_SVM_POINTER
-        // Returns CL_FALSE if no devices in the context associated with
-        // memobj support Shared Virtual Memory.
-        cl_bool usesSVMPointer;
-        error =
-            clGetMemObjectInfo(mem, CL_MEM_USES_SVM_POINTER,
-                               sizeof(usesSVMPointer), &usesSVMPointer, NULL);
-        test_error(error, "Unable to query CL_MEM_USES_SVM_POINTER");
-        test_assert_error(usesSVMPointer == CL_FALSE,
-                          "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-                          "CL_MEM_USES_SVM_POINTER did not return CL_FALSE");
-        // Check that the SVM APIs can be called.
-        // Returns NULL if no devices in context support Shared Virtual Memory.
-        void* ptr0 = clSVMAlloc(context, CL_MEM_READ_WRITE, allocSize, 0);
-        void* ptr1 = clSVMAlloc(context, CL_MEM_READ_WRITE, allocSize, 0);
-        test_assert_error(ptr0 == NULL && ptr1 == NULL,
-                          "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-                          "clSVMAlloc returned a non-NULL value");
-        // clEnqueueSVMFree, clEnqueueSVMMemcpy, clEnqueueSVMMemFill,
-        // clEnqueueSVMMap, clEnqueueSVMUnmap, clEnqueueSVMMigrateMem Returns
-        // CL_INVALID_OPERATION if the device associated with command_queue does
-        // not support Shared Virtual Memory.
-        // These calls purposefully pass bogus pointers to the functions to
-        // better test that they are a NOP when SVM is not supported.
-        void* bogus0 = (void*)0xDEADBEEF;
-        void* bogus1 = (void*)0xDEADDEAD;
-        cl_uint pattern = 0xAAAAAAAA;
-        error = clEnqueueSVMMemFill(queue, bogus0, &pattern, sizeof(pattern),
-                                    allocSize, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but clEnqueueSVMMemFill did "
-            "not return CL_INVALID_OPERATION");
-        error = clEnqueueSVMMemcpy(queue, CL_TRUE, bogus1, bogus0, allocSize, 0,
-                                   NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clEnqueueSVMMemcpy did not return CL_INVALID_OPERATION");
-        error = clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, bogus1, allocSize,
-                                0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clEnqueueSVMMap did not return CL_INVALID_OPERATION");
-        error = clEnqueueSVMUnmap(queue, bogus1, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clEnqueueSVMUnmap did not return CL_INVALID_OPERATION");
-        error = clEnqueueSVMMigrateMem(queue, 1, (const void**)&bogus1, NULL, 0,
-                                       0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clEnqueueSVMMigrateMem did not return CL_INVALID_OPERATION");
-        // If the enqueue calls above did not return errors, a clFinish would be
-        // needed here to ensure the SVM operations are complete before freeing
-        // the SVM pointers.
-        clSVMFree(context, bogus0);
-        error = clEnqueueSVMFree(queue, 1, &bogus0, NULL, NULL, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clEnqueueSVMFree did not return CL_INVALID_OPERATION");
-        // If the enqueue calls above did not return errors, a clFinish should
-        // be included here to ensure the enqueued SVM free is complete.
-        // clSetKernelArgSVMPointer, clSetKernelExecInfo
-        // Returns CL_INVALID_OPERATION if no devices in the context associated
-        // with kernel support Shared Virtual Memory.
-        error = clSetKernelArgSVMPointer(kernel, 0, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clSetKernelArgSVMPointer did not return CL_INVALID_OPERATION");
-        error =
-            clSetKernelExecInfo(kernel, CL_KERNEL_EXEC_INFO_SVM_PTRS, 0, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_DEVICE_SVM_CAPABILITIES returned 0 but "
-            "clSetKernelExecInfo did not return CL_INVALID_OPERATION");
-    }
-    return TEST_PASS;
-static int check_atomic_capabilities(cl_device_atomic_capabilities atomicCaps,
-                                     cl_device_atomic_capabilities requiredCaps)
-    if ((atomicCaps & requiredCaps) != requiredCaps)
-    {
-        log_error("Atomic capabilities %llx is missing support for at least "
-                  "one required capability %llx!\n",
-                  atomicCaps, requiredCaps);
-        return TEST_FAIL;
-    }
-    if ((atomicCaps & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) != 0
-        && (atomicCaps & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
-    {
-        log_error("Support for CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES requires "
-                  "support for CL_DEVICE_ATOMIC_SCOPE_DEVICE!\n");
-        return TEST_FAIL;
-    }
-    if ((atomicCaps & CL_DEVICE_ATOMIC_SCOPE_DEVICE) != 0
-        && (atomicCaps & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0)
-    {
-        log_error("Support for CL_DEVICE_ATOMIC_SCOPE_DEVICE requires "
-                  "support for CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP!\n");
-        return TEST_FAIL;
-    }
-    if ((atomicCaps & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) != 0
-        && (atomicCaps & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0)
-    {
-        log_error("Support for CL_DEVICE_ATOMIC_ORDER_SEQ_CST requires "
-                  "support for CL_DEVICE_ATOMIC_ORDER_ACQ_REL!\n");
-        return TEST_FAIL;
-    }
-    if ((atomicCaps & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) != 0
-        && (atomicCaps & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0)
-    {
-        log_error("Support for CL_DEVICE_ATOMIC_ORDER_ACQ_REL requires "
-                  "support for CL_DEVICE_ATOMIC_ORDER_RELAXED!\n");
-        return TEST_FAIL;
-    }
-    return TEST_PASS;
-int test_consistency_memory_model(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements)
-    cl_int error;
-    cl_device_atomic_capabilities atomicCaps = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-                            sizeof(atomicCaps), &atomicCaps, NULL);
-    test_error(error, "Unable to query CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES");
-    error = check_atomic_capabilities(atomicCaps,
-                                      CL_DEVICE_ATOMIC_ORDER_RELAXED
-                                          | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP);
-    if (error == TEST_FAIL)
-    {
-        log_error("Checks failed for CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES\n");
-        return error;
-    }
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
-                            sizeof(atomicCaps), &atomicCaps, NULL);
-    test_error(error, "Unable to query CL_DEVICE_ATOMIC_FENCE_CAPABILITIES");
-    error = check_atomic_capabilities(atomicCaps,
-                                      CL_DEVICE_ATOMIC_ORDER_RELAXED
-                                          | CL_DEVICE_ATOMIC_ORDER_ACQ_REL
-                                          | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP);
-    if (error == TEST_FAIL)
-    {
-        log_error("Checks failed for CL_DEVICE_ATOMIC_FENCE_CAPABILITIES\n");
-        return error;
-    }
-    return TEST_PASS;
-int test_consistency_device_enqueue(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements)
-    // May return 0, indicating that device does not support Device-Side Enqueue
-    // and On-Device Queues.
-    cl_int error;
-    cl_device_device_enqueue_capabilities dseCaps = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
-                            sizeof(dseCaps), &dseCaps, NULL);
-    test_error(error, "Unable to query CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES");
-    if (dseCaps == 0)
-    {
-        // clGetDeviceInfo, passing CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES
-        // Returns 0 if device does not support Device-Side Enqueue and
-        // On-Device Queues.
-        cl_command_queue_properties devQueueProps = 0;
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
-                                sizeof(devQueueProps), &devQueueProps, NULL);
-        test_error(error,
-                   "Unable to query CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES");
-        test_assert_error(
-            devQueueProps == 0,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES returned a non-zero value");
-        // clGetDeviceInfo, passing
-        // Returns 0 if device does not support Device-Side Enqueue and
-        // On-Device Queues.
-        cl_uint u = 0;
-        error =
-            clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE,
-                            sizeof(u), &u, NULL);
-        test_error(error,
-                   "Unable to query CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE");
-        test_assert_error(u == 0,
-                          "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 "
-                          "but CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE "
-                          "returned a non-zero value");
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
-                                sizeof(u), &u, NULL);
-        test_error(error, "Unable to query CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE");
-        test_assert_error(
-            u == 0,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE returned a non-zero value");
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_ON_DEVICE_QUEUES,
-                                sizeof(u), &u, NULL);
-        test_error(error, "Unable to query CL_DEVICE_MAX_ON_DEVICE_QUEUES");
-        test_assert_error(
-            u == 0,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_DEVICE_MAX_ON_DEVICE_QUEUES returned a non-zero value");
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_ON_DEVICE_EVENTS,
-                                sizeof(u), &u, NULL);
-        test_error(error, "Unable to query CL_DEVICE_MAX_ON_DEVICE_EVENTS");
-        test_assert_error(
-            u == 0,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_DEVICE_MAX_ON_DEVICE_EVENTS returned a non-zero value");
-        // clGetCommandQueueInfo, passing CL_QUEUE_SIZE
-        // Returns CL_INVALID_COMMAND_QUEUE since command_queue cannot be a
-        // valid device command-queue.
-        error =
-            clGetCommandQueueInfo(queue, CL_QUEUE_SIZE, sizeof(u), &u, NULL);
-        test_failure_error(
-            error, CL_INVALID_COMMAND_QUEUE,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_QUEUE_SIZE did not return CL_INVALID_COMMAND_QUEUE");
-        cl_command_queue q = NULL;
-        error = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE_DEFAULT, sizeof(q),
-                                      &q, NULL);
-        test_error(error, "Unable to query CL_QUEUE_DEVICE_DEFAULT");
-        test_assert_error(
-            q == NULL,
-            "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 but "
-            "CL_QUEUE_DEVICE_DEFAULT returned a non-NULL value");
-        // clSetDefaultDeviceCommandQueue
-        // Returns CL_INVALID_OPERATION if device does not support On-Device
-        // Queues.
-        error = clSetDefaultDeviceCommandQueue(context, deviceID, NULL);
-        test_failure_error(error, CL_INVALID_OPERATION,
-                           "CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES returned 0 "
-                           "but clSetDefaultDeviceCommandQueue did not return "
-                           "CL_INVALID_OPERATION");
-    }
-    else
-    {
-        if ((dseCaps & CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT) == 0)
-        {
-            // clSetDefaultDeviceCommandQueue
-            // Returns CL_INVALID_OPERATION if device does not support a
-            // replaceable default On-Device Queue.
-            error = clSetDefaultDeviceCommandQueue(context, deviceID, NULL);
-            test_failure_error(
-                error, CL_INVALID_OPERATION,
-                "include CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT but "
-                "clSetDefaultDeviceCommandQueue did not return "
-                "CL_INVALID_OPERATION");
-        }
-        // CL_DEVICE_QUEUE_SUPPORTED must also be set.
-        if ((dseCaps & CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT) != 0
-            && (dseCaps & CL_DEVICE_QUEUE_SUPPORTED) == 0)
-        {
-            log_error("DEVICE_QUEUE_REPLACEABLE_DEFAULT is set but "
-                      "DEVICE_QUEUE_SUPPORTED is not set\n");
-            return TEST_FAIL;
-        }
-        // Devices that set CL_DEVICE_QUEUE_SUPPORTED must also return CL_TRUE
-        if ((dseCaps & CL_DEVICE_QUEUE_SUPPORTED) != 0)
-        {
-            cl_bool b;
-            error = clGetDeviceInfo(deviceID,
-                                    CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT,
-                                    sizeof(b), &b, NULL);
-            test_error(
-                error,
-                "Unable to query CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT");
-            test_assert_error(
-                b == CL_TRUE,
-                "DEVICE_QUEUE_SUPPORTED is set but "
-        }
-    }
-    return TEST_PASS;
-int test_consistency_pipes(cl_device_id deviceID, cl_context context,
-                           cl_command_queue queue, int num_elements)
-    // clGetDeviceInfo, passing CL_DEVICE_PIPE_SUPPORT
-    // May return CL_FALSE, indicating that device does not support Pipes.
-    cl_int error;
-    cl_bool pipeSupport = CL_FALSE;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_SUPPORT,
-                            sizeof(pipeSupport), &pipeSupport, NULL);
-    test_error(error, "Unable to query CL_DEVICE_PIPE_SUPPORT");
-    if (pipeSupport == CL_FALSE)
-    {
-        // clGetDeviceInfo, passing
-        // Returns 0 if device does not support Pipes.
-        cl_uint u = 0;
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PIPE_ARGS, sizeof(u),
-                                &u, NULL);
-        test_error(error, "Unable to query CL_DEVICE_MAX_PIPE_ARGS");
-        test_assert_error(u == 0,
-                          "CL_DEVICE_PIPE_SUPPORT returned CL_FALSE, but "
-                          "CL_DEVICE_MAX_PIPE_ARGS returned a non-zero value");
-        error =
-            clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS,
-                            sizeof(u), &u, NULL);
-        test_error(error,
-                   "Unable to query CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS");
-        test_assert_error(u == 0,
-                          "CL_DEVICE_PIPE_SUPPORT returned CL_FALSE, but "
-                          "CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS returned "
-                          "a non-zero value");
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_MAX_PACKET_SIZE,
-                                sizeof(u), &u, NULL);
-        test_error(error, "Unable to query CL_DEVICE_PIPE_MAX_PACKET_SIZE");
-        test_assert_error(
-            u == 0,
-            "CL_DEVICE_PIPE_SUPPORT returned CL_FALSE, but "
-            "CL_DEVICE_PIPE_MAX_PACKET_SIZE returned a non-zero value");
-        // clCreatePipe
-        // Returns CL_INVALID_OPERATION if no devices in context support Pipes.
-        clMemWrapper mem =
-            clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, 4, 4, NULL, &error);
-        test_failure_error(error, CL_INVALID_OPERATION,
-                           "CL_DEVICE_PIPE_SUPPORT returned CL_FALSE but "
-                           "clCreatePipe did not return CL_INVALID_OPERATION");
-        // clGetPipeInfo
-        // Returns CL_INVALID_MEM_OBJECT since pipe cannot be a valid pipe
-        // object.
-        clMemWrapper not_a_pipe =
-            clCreateBuffer(context, CL_MEM_READ_WRITE, 4, NULL, &error);
-        test_error(error, "Unable to create non-pipe buffer");
-        error =
-            clGetPipeInfo(not_a_pipe, CL_PIPE_PACKET_SIZE, sizeof(u), &u, NULL);
-        test_failure_error(
-            error, CL_INVALID_MEM_OBJECT,
-            "CL_DEVICE_PIPE_SUPPORT returned CL_FALSE but clGetPipeInfo did "
-            "not return CL_INVALID_MEM_OBJECT");
-    }
-    else
-    {
-        // Devices that support pipes must also return CL_TRUE
-        cl_bool b;
-        error =
-            clGetDeviceInfo(deviceID, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT,
-                            sizeof(b), &b, NULL);
-        test_error(error,
-                   "Unable to query CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT");
-        test_assert_error(
-            b == CL_TRUE,
-            "CL_DEVICE_PIPE_SUPPORT returned CL_TRUE but "
-    }
-    return TEST_PASS;
-int test_consistency_progvar(cl_device_id deviceID, cl_context context,
-                             cl_command_queue queue, int num_elements)
-    // clGetDeviceInfo, passing CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE
-    // May return 0, indicating that device does not support Program Scope
-    // Global Variables.
-    cl_int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    size_t maxGlobalVariableSize = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
-                            sizeof(maxGlobalVariableSize),
-                            &maxGlobalVariableSize, NULL);
-    test_error(error, "Unable to query CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE");
-    if (maxGlobalVariableSize == 0)
-    {
-        // Test setup:
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &test_kernel, "test");
-        test_error(error, "Unable to create test kernel");
-        size_t sz = SIZE_MAX;
-        // clGetDeviceInfo, passing
-        // Returns 0 if device does not support Program Scope Global Variables.
-        error = clGetDeviceInfo(deviceID,
-                                CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE,
-                                sizeof(sz), &sz, NULL);
-        test_error(
-            error,
-        test_assert_error(
-            sz == 0,
-            "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE returned 0 but "
-            "non-zero value");
-        // clGetProgramBuildInfo, passing
-        // Returns 0 if device does not support Program Scope Global Variables.
-        error = clGetProgramBuildInfo(
-            sizeof(sz), &sz, NULL);
-        test_error(
-            error,
-            "Unable to query CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE");
-        test_assert_error(sz == 0,
-                          "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE returned 0 "
-                          "but CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE "
-                          "returned a non-zero value");
-    }
-    return TEST_PASS;
-int test_consistency_non_uniform_work_group(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements)
-    // clGetDeviceInfo, passing CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT:
-    // May return CL_FALSE, indicating that device does not support Non-Uniform
-    // Work Groups.
-    cl_int error;
-    const size_t allocSize = 16;
-    clMemWrapper mem;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_bool nonUniformWorkGroupSupport = CL_FALSE;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
-                            sizeof(nonUniformWorkGroupSupport),
-                            &nonUniformWorkGroupSupport, NULL);
-    test_error(error,
-               "Unable to query CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT");
-    if (nonUniformWorkGroupSupport == CL_FALSE)
-    {
-        // Test setup:
-        mem =
-            clCreateBuffer(context, CL_MEM_READ_WRITE, allocSize, NULL, &error);
-        test_error(error, "Unable to create test buffer");
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &test_kernel, "test");
-        test_error(error, "Unable to create test kernel");
-        error = clSetKernelArg(kernel, 0, sizeof(mem), &mem);
-        // clEnqueueNDRangeKernel
-        // Behaves as though Non-Uniform Work Groups were not enabled for
-        // kernel, if the device associated with command_queue does not support
-        // Non-Uniform Work Groups.
-        size_t global_work_size[] = { 3, 3, 3 };
-        size_t local_work_size[] = { 2, 2, 2 };
-        // First, check that a NULL local work size succeeds.
-        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
-                                       NULL, 0, NULL, NULL);
-        test_error(error,
-                   "Unable to enqueue kernel with a NULL local work size");
-        error = clFinish(queue);
-        test_error(error, "Error calling clFinish after NULL local work size");
-        // 1D non-uniform work group:
-        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
-                                       local_work_size, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_WORK_GROUP_SIZE,
-            "clEnqueueNDRangeKernel did not return CL_INVALID_WORK_GROUP_SIZE");
-        // 2D non-uniform work group:
-        global_work_size[0] = local_work_size[0];
-        error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_work_size,
-                                       local_work_size, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_WORK_GROUP_SIZE,
-            "clEnqueueNDRangeKernel did not return CL_INVALID_WORK_GROUP_SIZE");
-        // 3D non-uniform work group:
-        global_work_size[1] = local_work_size[1];
-        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
-                                       local_work_size, 0, NULL, NULL);
-        test_failure_error(
-            error, CL_INVALID_WORK_GROUP_SIZE,
-            "clEnqueueNDRangeKernel did not return CL_INVALID_WORK_GROUP_SIZE");
-    }
-    return TEST_PASS;
-int test_consistency_read_write_images(cl_device_id deviceID,
-                                       cl_context context,
-                                       cl_command_queue queue, int num_elements)
-    // clGetDeviceInfo, passing
-    // indicating that device does not support Read-Write Images.
-    cl_int error;
-    cl_uint maxReadWriteImageArgs = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
-                            sizeof(maxReadWriteImageArgs),
-                            &maxReadWriteImageArgs, NULL);
-    test_error(error,
-               "Unable to query "
-    // clGetSupportedImageFormats, passing
-    // Returns an empty set (such as num_image_formats equal to 0), indicating
-    // that no image formats are supported for reading and writing in the same
-    // kernel, if no devices in context support Read-Write Images.
-    cl_uint totalReadWriteImageFormats = 0;
-    const cl_mem_object_type image_types[] = {
-    };
-    for (int i = 0; i < ARRAY_SIZE(image_types); i++)
-    {
-        cl_uint numImageFormats = 0;
-        error = clGetSupportedImageFormats(
-            context, CL_MEM_KERNEL_READ_AND_WRITE, image_types[i], 0, NULL,
-            &numImageFormats);
-        test_error(error,
-                   "Unable to query number of CL_MEM_KERNEL_READ_AND_WRITE "
-                   "image formats");
-        totalReadWriteImageFormats += numImageFormats;
-    }
-    if (maxReadWriteImageArgs == 0)
-    {
-        test_assert_error(
-            totalReadWriteImageFormats == 0,
-            "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS returned 0 "
-            "but clGetSupportedImageFormats(CL_MEM_KERNEL_READ_AND_WRITE) "
-            "returned a non-empty set");
-    }
-    else
-    {
-        test_assert_error(
-            totalReadWriteImageFormats != 0,
-            "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS is non-zero "
-            "but clGetSupportedImageFormats(CL_MEM_KERNEL_READ_AND_WRITE) "
-            "returned an empty set");
-    }
-    return TEST_PASS;
-int test_consistency_2d_image_from_buffer(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements)
-    // clGetDeviceInfo, passing CL_DEVICE_IMAGE_PITCH_ALIGNMENT or
-    // May return 0, indicating that device does not support Creating a 2D Image
-    // from a Buffer.
-    cl_int error;
-    const cl_image_format imageFormat = { CL_RGBA, CL_UNORM_INT8 };
-    const size_t imageDim = 2;
-    const size_t elementSize = 4;
-    const size_t bufferSize = imageDim * imageDim * elementSize;
-    clMemWrapper buffer;
-    clMemWrapper image;
-    cl_uint imagePitchAlignment = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_PITCH_ALIGNMENT,
-                            sizeof(imagePitchAlignment), &imagePitchAlignment,
-                            NULL);
-    test_error(error,
-               "Unable to query "
-    cl_uint imageBaseAddressAlignment = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT,
-                            sizeof(imageBaseAddressAlignment),
-                            &imageBaseAddressAlignment, NULL);
-    test_error(error,
-               "Unable to query "
-    bool supports_cl_khr_image2d_from_buffer =
-        is_extension_available(deviceID, "cl_khr_image2d_from_buffer");
-    if (imagePitchAlignment == 0 || imageBaseAddressAlignment == 0)
-    {
-        // This probably means that Creating a 2D Image from a Buffer is not
-        // supported.
-        // Test setup:
-        buffer =
-            clCreateBuffer(context, CL_MEM_READ_ONLY, bufferSize, NULL, &error);
-        test_error(error, "Unable to create test buffer");
-        // Check that both queries return zero:
-        test_assert_error(
-            imagePitchAlignment == 0,
-            "CL_DEVICE_IMAGE_PITCH_ALIGNMENT returned a non-zero value but "
-            "CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT returned 0");
-        test_assert_error(
-            imageBaseAddressAlignment == 0,
-            "CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT returned a non-zero value "
-            "but CL_DEVICE_IMAGE_PITCH_ALIGNMENT returned 0");
-        // clGetDeviceInfo, passing CL_DEVICE_EXTENSIONS
-        // Will not describe support for the cl_khr_image2d_from_buffer
-        // extension if device does not support Creating a 2D Image from a
-        // Buffer.
-        test_assert_error(supports_cl_khr_image2d_from_buffer == false,
-                          "Device does not support Creating a 2D Image from a "
-                          "Buffer but does support cl_khr_image2d_from_buffer");
-        // clCreateImage or clCreateImageWithProperties, passing image_type
-        // equal to CL_MEM_OBJECT_IMAGE2D and mem_object not equal to
-        // NULL
-        // Returns CL_INVALID_OPERATION if no devices in context support
-        // Creating a 2D Image from a Buffer.
-        cl_image_desc imageDesc = { 0 };
-        imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
-        imageDesc.image_width = imageDim;
-        imageDesc.image_height = imageDim;
-        imageDesc.mem_object = buffer;
-        image = clCreateImage(context, CL_MEM_READ_ONLY, &imageFormat,
-                              &imageDesc, NULL, &error);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "Device does not support Creating a 2D Image from a "
-            "Buffer but clCreateImage did not return CL_INVALID_OPERATION");
-        image =
-            clCreateImageWithProperties(context, NULL, CL_MEM_READ_ONLY,
-                                        &imageFormat, &imageDesc, NULL, &error);
-        test_failure_error(error, CL_INVALID_OPERATION,
-                           "Device does not support Creating a 2D Image from a "
-                           "Buffer but clCreateImageWithProperties did not "
-                           "return CL_INVALID_OPERATION");
-    }
-    else
-    {
-        test_assert_error(supports_cl_khr_image2d_from_buffer,
-                          "Device supports Creating a 2D Image from a Buffer "
-                          "but does not support cl_khr_image2d_from_buffer");
-    }
-    return TEST_PASS;
-// Nothing needed for sRGB Images:
-// All of the sRGB Image Channel Orders (such as CL_​sRGBA) are optional for
-// devices supporting OpenCL 3.0.
-int test_consistency_depth_images(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements)
-    // The CL_DEPTH Image Channel Order is optional for devices supporting
-    // OpenCL 3.0.
-    cl_int error;
-    cl_uint totalDepthImageFormats = 0;
-    const cl_mem_flags mem_flags[] = {
-    };
-    for (int i = 0; i < ARRAY_SIZE(mem_flags); i++)
-    {
-        cl_uint numImageFormats = 0;
-        error = clGetSupportedImageFormats(context, mem_flags[i],
-                                           CL_MEM_OBJECT_IMAGE2D, 0, NULL,
-                                           &numImageFormats);
-        test_error(
-            error,
-            "Unable to query number of CL_MEM_OBJECT_IMAGE2D image formats");
-        std::vector<cl_image_format> imageFormats(numImageFormats);
-        error = clGetSupportedImageFormats(
-            context, mem_flags[i], CL_MEM_OBJECT_IMAGE2D, imageFormats.size(),
-  , NULL);
-        test_error(error,
-                   "Unable to query CL_MEM_OBJECT_IMAGE2D image formats");
-        for (auto& imageFormat : imageFormats)
-        {
-            if (imageFormat.image_channel_order == CL_DEPTH)
-            {
-                totalDepthImageFormats++;
-            }
-        }
-    }
-    bool supports_cl_khr_depth_images =
-        is_extension_available(deviceID, "cl_khr_depth_images");
-    if (totalDepthImageFormats == 0)
-    {
-        test_assert_error(supports_cl_khr_depth_images == false,
-                          "Device does not support Depth Images but does "
-                          "support cl_khr_depth_images");
-    }
-    else
-    {
-        test_assert_error(supports_cl_khr_depth_images,
-                          "Device supports Depth Images but does not support "
-                          "cl_khr_depth_images");
-    }
-    return TEST_PASS;
-int test_consistency_device_and_host_timer(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements)
-    // clGetPlatformInfo, passing CL_PLATFORM_HOST_TIMER_RESOLUTION
-    // May return 0, indicating that platform does not support Device and Host
-    // Timer Synchronization.
-    cl_int error;
-    cl_platform_id platform = NULL;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform),
-                            &platform, NULL);
-    test_error(error, "Unable to query CL_DEVICE_PLATFORM");
-    cl_ulong hostTimerResolution = 0;
-    error = clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
-                              sizeof(hostTimerResolution), &hostTimerResolution,
-                              NULL);
-    test_error(error, "Unable to query CL_PLATFORM_HOST_TIMER_RESOLUTION");
-    if (hostTimerResolution == 0)
-    {
-        // clGetDeviceAndHostTimer, clGetHostTimer
-        // Returns CL_INVALID_OPERATION if the platform associated with device
-        // does not support Device and Host Timer Synchronization.
-        cl_ulong dt = 0;
-        cl_ulong ht = 0;
-        error = clGetDeviceAndHostTimer(deviceID, &dt, &ht);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_PLATFORM_HOST_TIMER_RESOLUTION returned 0 but "
-            "clGetDeviceAndHostTimer did not return CL_INVALID_OPERATION");
-        error = clGetHostTimer(deviceID, &ht);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "CL_PLATFORM_HOST_TIMER_RESOLUTION returned 0 but "
-            "clGetHostTimer did not return CL_INVALID_OPERATION");
-    }
-    return TEST_PASS;
-int test_consistency_il_programs(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements)
-    // clGetDeviceInfo, passing CL_DEVICE_IL_VERSION or
-    // May return an empty string and empty array, indicating that device does
-    // not support Intermediate Language Programs.
-    cl_int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    // Even if the device does not support Intermediate Language Programs the
-    // size of the string query should not be zero.
-    size_t sz = SIZE_MAX;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_IL_VERSION, 0, NULL, &sz);
-    test_error(error, "Unable to query CL_DEVICE_IL_VERSION");
-    test_assert_error(sz != 0,
-                      "CL_DEVICE_IL_VERSION should return a non-zero size");
-    std::string ilVersion = get_device_il_version_string(deviceID);
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_ILS_WITH_VERSION, 0, NULL, &sz);
-    test_error(error, "Unable to query CL_DEVICE_ILS_WITH_VERSION");
-    if (ilVersion == "" || sz == 0)
-    {
-        // This probably means that Intermediate Language Programs are not
-        // supported.
-        // Check that both queries are consistent:
-        test_assert_error(
-            ilVersion == "",
-            "CL_DEVICE_IL_VERSION returned a non-empty string but "
-            "CL_DEVICE_ILS_WITH_VERSION returned no supported ILs");
-        test_assert_error(sz == 0,
-                          "CL_DEVICE_ILS_WITH_VERSION returned supported ILs "
-                          "but CL_DEVICE_IL_VERSION returned an empty string");
-        bool supports_cl_khr_il_program =
-            is_extension_available(deviceID, "cl_khr_il_program");
-        test_assert_error(supports_cl_khr_il_program == false,
-                          "Device does not support IL Programs but does "
-                          "support cl_khr_il_program");
-        // Test setup:
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &test_kernel, "test");
-        test_error(error, "Unable to create test kernel");
-        // clGetProgramInfo, passing CL_PROGRAM_IL
-        // Returns an empty buffer (such as param_value_size_ret equal to 0) if
-        // no devices in the context associated with program support
-        // Intermediate Language Programs.
-        error = clGetProgramInfo(program, CL_PROGRAM_IL, 0, NULL, &sz);
-        test_error(error, "Unable to query CL_PROGRAM_IL");
-        test_assert_error(sz == 0,
-                          "Device does not support IL Programs but "
-                          "CL_PROGRAM_IL returned a non-zero size");
-        // clCreateProgramWithIL
-        // Returns CL_INVALID_OPERATION if no devices in context support
-        // Intermediate Language Programs.
-        cl_uint bogus = 0xDEADBEEF;
-        clProgramWrapper ilProgram =
-            clCreateProgramWithIL(context, &bogus, sizeof(bogus), &error);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "Device does not support IL Programs but clCreateProgramWithIL did "
-            "not return CL_INVALID_OPERATION");
-        // clSetProgramSpecializationConstant
-        // Returns CL_INVALID_OPERATION if no devices associated with program
-        // support Intermediate Language Programs.
-        cl_uint specConst = 42;
-        error = clSetProgramSpecializationConstant(
-            program, 0, sizeof(specConst), &specConst);
-        test_failure_error(error, CL_INVALID_OPERATION,
-                           "Device does not support IL Programs but "
-                           "clSetProgramSpecializationConstant did not return "
-                           "CL_INVALID_OPERATION");
-    }
-    return TEST_PASS;
-int test_consistency_subgroups(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements)
-    // clGetDeviceInfo, passing CL_DEVICE_MAX_NUM_SUB_GROUPS
-    // May return 0, indicating that device does not support Subgroups.
-    cl_int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_uint maxNumSubGroups = 0;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_NUM_SUB_GROUPS,
-                            sizeof(maxNumSubGroups), &maxNumSubGroups, NULL);
-    test_error(error, "Unable to query CL_DEVICE_MAX_NUM_SUB_GROUPS");
-    if (maxNumSubGroups == 0)
-    {
-        // Test setup:
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &test_kernel, "test");
-        test_error(error, "Unable to create test kernel");
-        // clGetDeviceInfo, passing
-        // Returns CL_FALSE if device does not support Subgroups.
-        cl_bool ifp = CL_FALSE;
-        error = clGetDeviceInfo(
-            sizeof(ifp), &ifp, NULL);
-        test_error(
-            error,
-        test_assert_error(ifp == CL_FALSE,
-                          "Device does not support Subgroups but "
-                          "did not return CL_FALSE");
-        // clGetDeviceInfo, passing CL_DEVICE_EXTENSIONS
-        // Will not describe support for the cl_khr_subgroups extension if
-        // device does not support Subgroups.
-        bool supports_cl_khr_subgroups =
-            is_extension_available(deviceID, "cl_khr_subgroups");
-        test_assert_error(supports_cl_khr_subgroups == false,
-                          "Device does not support Subgroups but does "
-                          "support cl_khr_subgroups");
-        // clGetKernelSubGroupInfo
-        // Returns CL_INVALID_OPERATION if device does not support Subgroups.
-        size_t sz = SIZE_MAX;
-        error = clGetKernelSubGroupInfo(kernel, deviceID,
-                                        CL_KERNEL_MAX_NUM_SUB_GROUPS, 0, NULL,
-                                        sizeof(sz), &sz, NULL);
-        test_failure_error(
-            error, CL_INVALID_OPERATION,
-            "Device does not support Subgroups but clGetKernelSubGroupInfo did "
-            "not return CL_INVALID_OPERATION");
-    }
-    return TEST_PASS;
-static void CL_CALLBACK program_callback(cl_program, void*) {}
-int test_consistency_prog_ctor_dtor(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements)
-    cl_int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    // Test setup:
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &test_kernel, "test");
-    test_error(error, "Unable to create test kernel");
-    // clGetProgramInfo, passing CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT or
-    // Returns CL_FALSE if no devices in the context associated with program
-    // support Program Initialization and Clean-Up Kernels.
-    cl_bool b = CL_FALSE;
-    error = clGetProgramInfo(program, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT,
-                             sizeof(b), &b, NULL);
-    test_error(error, "Unable to query CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT");
-    test_assert_error(
-        b == CL_FALSE,
-    error = clGetProgramInfo(program, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT,
-                             sizeof(b), &b, NULL);
-    test_error(error, "Unable to query CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT");
-    test_assert_error(
-        b == CL_FALSE,
-    // clSetProgramReleaseCallback
-    // Returns CL_INVALID_OPERATION if no devices in the context associated with
-    // program support Program Initialization and Clean-Up Kernels.
-    error = clSetProgramReleaseCallback(program, program_callback, NULL);
-    test_failure_error(
-        error, CL_INVALID_OPERATION,
-        "clSetProgramReleaseCallback did not return CL_INVALID_OPERATION");
-    return TEST_PASS;
-int test_consistency_3d_image_writes(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements)
-    // clGetSupportedImageFormats, passing CL_MEM_OBJECT_IMAGE3D and one of
-    // Returns an empty set (such as num_image_formats equal to 0),
-    // indicating that no image formats are supported for writing to 3D
-    // image objects, if no devices in context support Writing to 3D Image
-    // Objects.
-    cl_int error;
-    cl_uint total3DImageWriteFormats = 0;
-    const cl_mem_flags mem_flags[] = {
-    };
-    for (int i = 0; i < ARRAY_SIZE(mem_flags); i++)
-    {
-        cl_uint numImageFormats = 0;
-        error = clGetSupportedImageFormats(context, mem_flags[i],
-                                           CL_MEM_OBJECT_IMAGE3D, 0, NULL,
-                                           &numImageFormats);
-        test_error(
-            error,
-            "Unable to query number of CL_MEM_OBJECT_IMAGE3D image formats");
-        total3DImageWriteFormats += numImageFormats;
-    }
-    bool supports_cl_khr_3d_image_writes =
-        is_extension_available(deviceID, "cl_khr_3d_image_writes");
-    if (total3DImageWriteFormats == 0)
-    {
-        // clGetDeviceInfo, passing CL_DEVICE_EXTENSIONS
-        // Will not describe support for the cl_khr_3d_image_writes extension if
-        // device does not support Writing to 3D Image Objects.
-        test_assert_error(supports_cl_khr_3d_image_writes == false,
-                          "Device does not support Writing to 3D Image Objects "
-                          "but does support cl_khr_3d_image_writes");
-    }
-    else
-    {
-        test_assert_error(supports_cl_khr_3d_image_writes,
-                          "Device supports Writing to 3D Image Objects but "
-                          "does not support cl_khr_3d_image_writes");
-    }
-    return TEST_PASS;
diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp
index 9e981cd..4d90211 100644
--- a/test_conformance/api/test_api_min_max.cpp
+++ b/test_conformance/api/test_api_min_max.cpp
@@ -136,8 +136,7 @@
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * 100, NULL, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error );
     if( streams[0] == NULL )
         log_error("ERROR: Creating test array failed!\n");
@@ -322,8 +321,7 @@
     test_error( error, "Failed to create the program and kernel.");
     free( programSrc );
-    result = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float), NULL,
-                            &error);
+    result = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float), NULL, &error);
     test_error( error, "clCreateBufer failed");
     /* Create some I/O streams */
@@ -694,8 +692,7 @@
     /* Just get any ol format to test with */
-    error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
-                                   CL_MEM_READ_ONLY, 0, &image_format_desc);
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
     test_error( error, "Unable to obtain suitable image format to test with!" );
     /* Get the max 2d image width */
@@ -751,8 +748,7 @@
     /* Just get any ol format to test with */
-    error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
-                                   CL_MEM_READ_ONLY, 0, &image_format_desc);
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
     test_error( error, "Unable to obtain suitable image format to test with!" );
     /* Get the max 2d image width */
@@ -809,8 +805,7 @@
     /* Just get any ol format to test with */
-    error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
-                                   CL_MEM_READ_ONLY, 0, &image_format_desc);
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
     test_error( error, "Unable to obtain suitable image format to test with!" );
     /* Get the max 2d image width */
@@ -996,7 +991,6 @@
     size_t decrement;
     cl_event event;
     cl_int event_status;
-    bool embeddedNoLong = gIsEmbedded && !gHasLong;
     /* Get the max param size */
@@ -1010,9 +1004,8 @@
         return -1;
-    /* The embedded profile without cles_khr_int64 extension does not require
-     * longs, so use ints */
-    if (embeddedNoLong)
+    /* The embedded profile does not require longs, so use ints */
+    if(gIsEmbedded)
         numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_int);
         numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_long);
@@ -1028,7 +1021,7 @@
         clMemWrapper mem;
         clKernelWrapper kernel;
-        if (embeddedNoLong)
+        if(gIsEmbedded)
             log_info("Trying a kernel with %ld int arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
                      numberOfIntParametersToTry, sizeof(cl_int)*numberOfIntParametersToTry, sizeof(cl_mem),
@@ -1099,8 +1092,7 @@
         /* Try to set a large argument to the kernel */
         retVal = 0;
-        mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long), NULL,
-                             &error);
+        mem = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_long), NULL, &error);
         test_error(error, "clCreateBuffer failed");
         for (i=0; i<(int)numberOfIntParametersToTry; i++) {
@@ -1254,8 +1246,7 @@
     clMemWrapper image = create_image_2d( context, CL_MEM_READ_WRITE, &format, 16, 16, 0, NULL, &error );
     test_error( error, "Unable to create a test image" );
-    clMemWrapper stream =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, 16, NULL, &error);
+    clMemWrapper stream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), 16, NULL, &error );
     test_error( error, "Unable to create test buffer" );
     error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &image );
@@ -1356,11 +1347,9 @@
             constantData[i] = (int)genrand_int32(d);
         clMemWrapper streams[3];
-        streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    sizeToAllocate, constantData, &error);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, constantData, &error);
         test_error( error, "Creating test array failed" );
-        streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
-                                    NULL, &error);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeToAllocate, NULL, &error);
         test_error( error, "Creating test array failed" );
@@ -1524,8 +1513,7 @@
     streams = new clMemWrapper[ maxArgs + 1 ];
     for( i = 0; i < maxArgs + 1; i++ )
-        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    individualBufferSize, NULL, &error);
+        streams[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), individualBufferSize, NULL, &error);
         test_error( error, "Creating test array failed" );
@@ -1670,7 +1658,8 @@
     size_t    threads[1], localThreads[1];
     cl_int *localData, *resultData;
     cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size;
-    Version device_version;
+    cl_char buffer[ 4098 ];
+    size_t length;
     int i;
     int err = 0;
     MTdata d;
@@ -1679,33 +1668,31 @@
     error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( maxSize ), &maxSize, 0 );
     test_error( error, "Unable to get max local buffer size" );
-    try
-    {
-        device_version = get_device_cl_version(deviceID);
-    } catch (const std::runtime_error &e)
-    {
-        log_error("%s", e.what());
-        return -1;
-    }
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
     if (!gIsEmbedded)
-        if (device_version == Version(1, 0))
+        if( memcmp( buffer, "OpenCL 2.0", strlen( "OpenCL 2.0" ) ) == 0 )
             min_max_local_mem_size = 16L * 1024L;
-        else
+        else if( memcmp( buffer, "OpenCL 2.1", strlen( "OpenCL 2.1" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if( memcmp( buffer, "OpenCL 1.2", strlen( "OpenCL 1.2" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if( memcmp( buffer, "OpenCL 1.1", strlen( "OpenCL 1.1" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if ( memcmp( buffer, "OpenCL 1.0", strlen( "OpenCL 1.0" ) ) != 0 )
             min_max_local_mem_size = 32L * 1024L;
-    }
-    else
-    {
-        min_max_local_mem_size = 1L * 1024L;
+        else
+        {
+            log_error( "ERROR: device version string does not match required format! (returned: %s)\n", (char *)buffer );
+            return -1;
+        }
-    if (maxSize < min_max_local_mem_size)
+    if( maxSize < (gIsEmbedded ? 1L * 1024L : min_max_local_mem_size) )
-        const std::string version_as_string = device_version.to_string();
-        log_error("ERROR: Reported local mem size less than required by OpenCL "
-                  "%s (reported %d KB)\n",
-                  version_as_string.c_str(), (int)(maxSize / 1024L));
+        log_error( "ERROR: Reported local mem size less than required by OpenCL 1.1 (reported %dKb)\n", (int)( maxSize / 1024L ) );
         return -1;
@@ -1734,11 +1721,9 @@
         localData[i] = (int)genrand_int32(d);
     free_mtdata(d); d = NULL;
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate,
-                                localData, &error);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, localData, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
-                                NULL, &error);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeToAllocate, NULL, &error);
     test_error( error, "Creating test array failed" );
diff --git a/test_conformance/api/test_clone_kernel.cpp b/test_conformance/api/test_clone_kernel.cpp
index 1a7e67a..1f22781 100644
--- a/test_conformance/api/test_clone_kernel.cpp
+++ b/test_conformance/api/test_clone_kernel.cpp
@@ -113,16 +113,15 @@
     clSamplerWrapper sampler;
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNSIGNED_INT8;
-    cl_image_desc imageDesc;
-    memset(&imageDesc, 0x0, sizeof(cl_image_desc));
+	cl_image_desc imageDesc;
+	memset(&imageDesc, 0x0, sizeof(cl_image_desc));
     imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
     imageDesc.image_width = 512;
     imageDesc.image_height = 512;
     cl_uint color[4] = {1,3,5,7};
-    clProgramWrapper program_read;
-    clProgramWrapper program_write;
+    clProgramWrapper program;
     clKernelWrapper kernel_read;
     clKernelWrapper kernel_write;
     clKernelWrapper kernel_cloned;
@@ -130,16 +129,12 @@
     clMemWrapper img;
-    if (create_single_kernel_helper(context, &program_read, &kernel_read, 1,
-                                    clone_kernel_test_img, "img_read_kernel")
-        != 0)
+    if( create_single_kernel_helper( context, &program, &kernel_read, 1, clone_kernel_test_img, "img_read_kernel" ) != 0 )
         return -1;
-    if (create_single_kernel_helper(context, &program_write, &kernel_write, 1,
-                                    clone_kernel_test_img, "img_write_kernel")
-        != 0)
+    if( create_single_kernel_helper( context, &program, &kernel_write, 1, clone_kernel_test_img, "img_write_kernel" ) != 0 )
         return -1;
@@ -246,8 +241,6 @@
     int error;
     clProgramWrapper program;
-    clProgramWrapper program_buf_read;
-    clProgramWrapper program_buf_write;
     clKernelWrapper kernel;
     clKernelWrapper kernel_pipe_read;
     clKernelWrapper kernel_buf_read;
@@ -279,18 +272,12 @@
         return -1;
-    if (create_single_kernel_helper(context, &program_buf_read,
-                                    &kernel_buf_read, 1,
-                                    clone_kernel_test_kernel, "buf_read_kernel")
-        != 0)
+    if( create_single_kernel_helper( context, &program, &kernel_buf_read, 1, clone_kernel_test_kernel, "buf_read_kernel" ) != 0 )
         return -1;
-    if (create_single_kernel_helper(
-            context, &program_buf_write, &kernel_buf_write, 1,
-            clone_kernel_test_kernel, "buf_write_kernel")
-        != 0)
+    if( create_single_kernel_helper( context, &program, &kernel_buf_write, 1, clone_kernel_test_kernel, "buf_write_kernel" ) != 0 )
         return -1;
diff --git a/test_conformance/api/test_context_destructor_callback.cpp b/test_conformance/api/test_context_destructor_callback.cpp
deleted file mode 100644
index 1d73a3c..0000000
--- a/test_conformance/api/test_context_destructor_callback.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-static volatile cl_int sDestructorIndex;
-void CL_CALLBACK context_destructor_callback(cl_context context, void *userData)
-    int *userPtr = (int *)userData;
-    // ordering of callbacks is guaranteed, meaning we don't need to do atomic
-    // operation here
-    *userPtr = ++sDestructorIndex;
-int test_context_destructor_callback(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements)
-    cl_int error;
-    clContextWrapper localContext =
-        clCreateContext(NULL, 1, &deviceID, NULL, NULL, &error);
-    test_error(error, "Unable to create local context");
-    // Set up some variables to catch the order in which callbacks are called
-    volatile int callbackOrders[3] = { 0, 0, 0 };
-    sDestructorIndex = 0;
-    // Set up the callbacks
-    error = clSetContextDestructorCallback(
-        localContext, context_destructor_callback, (void *)&callbackOrders[0]);
-    test_error(error, "Unable to set destructor callback");
-    error = clSetContextDestructorCallback(
-        localContext, context_destructor_callback, (void *)&callbackOrders[1]);
-    test_error(error, "Unable to set destructor callback");
-    error = clSetContextDestructorCallback(
-        localContext, context_destructor_callback, (void *)&callbackOrders[2]);
-    test_error(error, "Unable to set destructor callback");
-    // Now release the context, which SHOULD call the callbacks
-    error = clReleaseContext(localContext);
-    test_error(error, "Unable to release local context");
-    // Note: since we manually released the context, we need to set it to NULL
-    // to prevent a double-release
-    localContext = NULL;
-    // At this point, all three callbacks should have already been called
-    int numErrors = 0;
-    for (int i = 0; i < 3; i++)
-    {
-        // Spin waiting for the release to finish.  If you don't call the
-        // context_destructor_callback, you will not pass the test.
-        log_info("\tWaiting for callback %d...\n", i);
-        int wait = 0;
-        while (0 == callbackOrders[i])
-        {
-            usleep(100000); // 1/10th second
-            if (++wait >= 10 * 10)
-            {
-                log_error("\tERROR: Callback %d was not called within 10 "
-                          "seconds!  Assuming failure.\n",
-                          i + 1);
-                numErrors++;
-                break;
-            }
-        }
-        if (callbackOrders[i] != 3 - i)
-        {
-            log_error("\tERROR: Callback %d was called in the wrong order! "
-                      "(Was called order %d, should have been order %d)\n",
-                      i + 1, callbackOrders[i], 3 - i);
-            numErrors++;
-        }
-    }
-    return (numErrors > 0) ? TEST_FAIL : TEST_PASS;
diff --git a/test_conformance/api/test_create_context_from_type.cpp b/test_conformance/api/test_create_context_from_type.cpp
index b67041f..bbc2c86 100644
--- a/test_conformance/api/test_create_context_from_type.cpp
+++ b/test_conformance/api/test_create_context_from_type.cpp
@@ -80,11 +80,9 @@
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context_to_test, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 10, NULL, &error);
+    streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context_to_test, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * 10, NULL, &error);
+    streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
     test_error( error, "Creating test array failed" );
     /* Write some test data */
diff --git a/test_conformance/api/test_create_kernels.cpp b/test_conformance/api/test_create_kernels.cpp
index 568e84c..59f7f0a 100644
--- a/test_conformance/api/test_create_kernels.cpp
+++ b/test_conformance/api/test_create_kernels.cpp
@@ -456,8 +456,7 @@
     // Create args
     count = 100;
-    output = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * count,
-                            NULL, &error);
+    output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
     test_error( error, "Unable to create output buffer" );
     error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
@@ -525,10 +524,11 @@
         local_queue = clCreateCommandQueue(local_context, deviceID, 0, &error);
         test_error( error, "clCreateCommandQueue failed");
-        error = create_single_kernel_helper(
-            local_context, &local_program, &local_kernel, 1,
-            &repeate_test_kernel, "test_kernel");
-        test_error(error, "Unable to create kernel");
+        error = create_single_kernel_helper(local_context, &local_program, NULL, 1, &repeate_test_kernel, NULL);
+        test_error( error, "Unable to build test program" );
+        local_kernel = clCreateKernel(local_program, "test_kernel", &error);
+        test_error( error, "clCreateKernel failed");
         local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
         test_error( error, "clCreateBuffer failed");
diff --git a/test_conformance/api/test_kernel_arg_changes.cpp b/test_conformance/api/test_kernel_arg_changes.cpp
index eb798a9..d85ae99 100644
--- a/test_conformance/api/test_kernel_arg_changes.cpp
+++ b/test_conformance/api/test_kernel_arg_changes.cpp
@@ -74,16 +74,14 @@
         sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
         sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
-        images[i] = create_image_2d(context, CL_MEM_READ_ONLY, &imageFormat,
-                                    sizes[i][0], sizes[i][1], 0, NULL, &error);
+        images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
+                                     &imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
         if( images[i] == NULL )
             log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
             return -1;
-        results[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_int) * threads[0] * 2, NULL, &error);
+        results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof( cl_int ) * threads[0] * 2, NULL, &error );
         if( results[i] == NULL)
             log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp
index 8073e0d..f1039ae 100644
--- a/test_conformance/api/test_kernel_arg_info.cpp
+++ b/test_conformance/api/test_kernel_arg_info.cpp
@@ -1,6 +1,6 @@
-// Copyright (c) 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,992 +13,5931 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include <iostream>
-#include <vector>
 #include "testBase.h"
-#include "harness/errorHelpers.h"
-#include "harness/typeWrappers.h"
-#include "harness/kernelHelpers.h"
+#include <limits.h>
+#include <ctype.h>
+#ifndef _WIN32
+#include <unistd.h>
-#define MINIMUM_OPENCL_PIPE_VERSION Version(2, 0)
+#define ARG_INFO_FIELD_COUNT        5
-static constexpr size_t CL_VERSION_LENGTH = 128;
-static constexpr size_t KERNEL_ARGUMENT_LENGTH = 128;
-static constexpr char KERNEL_ARGUMENT_NAME[] = "argument";
-static constexpr size_t KERNEL_ARGUMENT_NAME_LENGTH =
-    sizeof(KERNEL_ARGUMENT_NAME) + 1;
-static constexpr int SINGLE_KERNEL_ARG_NUMBER = 0;
-static constexpr int MAX_NUMBER_OF_KERNEL_ARGS = 128;
+#define ARG_INFO_ADDR_OFFSET        1
+#define ARG_INFO_ACCESS_OFFSET        2
-static const std::vector<cl_kernel_arg_address_qualifier> address_qualifiers = {
+typedef char const * kernel_args_t[];
+kernel_args_t required_kernel_args = {
+    "typedef float4 typedef_type;\n"
+    "\n"
+    "typedef struct struct_type {\n"
+    "    float4 float4d;\n"
+    "    int intd;\n"
+    "} typedef_struct_type;\n"
+    "\n"
+    "typedef union union_type {\n"
+    "    float4 float4d;\n"
+    "    uint4 uint4d;\n"
+    "} typedef_union_type;\n"
+    "\n"
+    "typedef enum enum_type {\n"
+    "    enum_type_zero,\n"
+    "    enum_type_one,\n"
+    "    enum_type_two\n"
+    "} typedef_enum_type;\n"
+    "\n"
+    "kernel void constant_scalar_p0(constant void*constantvoidp,\n"
+    "                              constant char *constantcharp,\n"
+    "                              constant uchar* constantucharp,\n"
+    "                              constant unsigned char * constantunsignedcharp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p1(constant short*constantshortp,\n"
+    "                              constant ushort *constantushortp,\n"
+    "                              constant unsigned short* constantunsignedshortp,\n"
+    "                              constant int * constantintp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p2(constant uint*constantuintp,\n"
+    "                              constant unsigned int *constantunsignedintp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p3(constant float *constantfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_scalar_restrict_p0(constant void* restrict constantvoidrestrictp,\n"
+    "                                       constant char * restrict constantcharrestrictp,\n"
+    "                                       constant uchar*restrict constantucharrestrictp,\n"
+    "                                       constant unsigned char *restrict constantunsignedcharrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p1(constant short* restrict constantshortrestrictp,\n"
+    "                                       constant ushort * restrict constantushortrestrictp,\n"
+    "                                       constant unsigned short*restrict constantunsignedshortrestrictp,\n"
+    "                                       constant int *restrict constantintrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p2(constant uint* restrict constantuintrestrictp,\n"
+    "                                       constant unsigned int * restrict constantunsignedintrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p3(constant float * restrict constantfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_p(global void*globalvoidp,\n"
+    "                            global char *globalcharp,\n"
+    "                            global uchar* globalucharp,\n"
+    "                            global unsigned char * globalunsignedcharp,\n"
+    "                            global short*globalshortp,\n"
+    "                            global ushort *globalushortp,\n"
+    "                            global unsigned short* globalunsignedshortp,\n"
+    "                            global int * globalintp,\n"
+    "                            global uint*globaluintp,\n"
+    "                            global unsigned int *globalunsignedintp,\n"
+    "                            global float *globalfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_restrict_p(global void* restrict globalvoidrestrictp,\n"
+    "                                     global char * restrict globalcharrestrictp,\n"
+    "                                     global uchar*restrict globalucharrestrictp,\n"
+    "                                     global unsigned char *restrict globalunsignedcharrestrictp,\n"
+    "                                     global short* restrict globalshortrestrictp,\n"
+    "                                     global ushort * restrict globalushortrestrictp,\n"
+    "                                     global unsigned short*restrict globalunsignedshortrestrictp,\n"
+    "                                     global int *restrict globalintrestrictp,\n"
+    "                                     global uint* restrict globaluintrestrictp,\n"
+    "                                     global unsigned int * restrict globalunsignedintrestrictp,\n"
+    "                                     global float * restrict globalfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_p(global const void*globalconstvoidp,\n"
+    "                                  global const char *globalconstcharp,\n"
+    "                                  global const uchar* globalconstucharp,\n"
+    "                                  global const unsigned char * globalconstunsignedcharp,\n"
+    "                                  global const short*globalconstshortp,\n"
+    "                                  global const ushort *globalconstushortp,\n"
+    "                                  global const unsigned short* globalconstunsignedshortp,\n"
+    "                                  global const int * globalconstintp,\n"
+    "                                  global const uint*globalconstuintp,\n"
+    "                                  global const unsigned int *globalconstunsignedintp,\n"
+    "                                  global const float *globalconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_restrict_p(global const void* restrict globalconstvoidrestrictp,\n"
+    "                                           global const char * restrict globalconstcharrestrictp,\n"
+    "                                           global const uchar*restrict globalconstucharrestrictp,\n"
+    "                                           global const unsigned char *restrict globalconstunsignedcharrestrictp,\n"
+    "                                           global const short* restrict globalconstshortrestrictp,\n"
+    "                                           global const ushort * restrict globalconstushortrestrictp,\n"
+    "                                           global const unsigned short*restrict globalconstunsignedshortrestrictp,\n"
+    "                                           global const int *restrict globalconstintrestrictp,\n"
+    "                                           global const uint* restrict globalconstuintrestrictp,\n"
+    "                                           global const unsigned int * restrict globalconstunsignedintrestrictp,\n"
+    "                                           global const float * restrict globalconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_p(global volatile void*globalvolatilevoidp,\n"
+    "                                     global volatile char *globalvolatilecharp,\n"
+    "                                     global volatile uchar* globalvolatileucharp,\n"
+    "                                     global volatile unsigned char * globalvolatileunsignedcharp,\n"
+    "                                     global volatile short*globalvolatileshortp,\n"
+    "                                     global volatile ushort *globalvolatileushortp,\n"
+    "                                     global volatile unsigned short* globalvolatileunsignedshortp,\n"
+    "                                     global volatile int * globalvolatileintp,\n"
+    "                                     global volatile uint*globalvolatileuintp,\n"
+    "                                     global volatile unsigned int *globalvolatileunsignedintp,\n"
+    "                                     global volatile float *globalvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_restrict_p(global volatile void* restrict globalvolatilevoidrestrictp,\n"
+    "                                              global volatile char * restrict globalvolatilecharrestrictp,\n"
+    "                                              global volatile uchar*restrict globalvolatileucharrestrictp,\n"
+    "                                              global volatile unsigned char *restrict globalvolatileunsignedcharrestrictp,\n"
+    "                                              global volatile short* restrict globalvolatileshortrestrictp,\n"
+    "                                              global volatile ushort * restrict globalvolatileushortrestrictp,\n"
+    "                                              global volatile unsigned short*restrict globalvolatileunsignedshortrestrictp,\n"
+    "                                              global volatile int *restrict globalvolatileintrestrictp,\n"
+    "                                              global volatile uint* restrict globalvolatileuintrestrictp,\n"
+    "                                              global volatile unsigned int * restrict globalvolatileunsignedintrestrictp,\n"
+    "                                              global volatile float * restrict globalvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_p(global const volatile void*globalconstvolatilevoidp,\n"
+    "                                           global const volatile char *globalconstvolatilecharp,\n"
+    "                                           global const volatile uchar* globalconstvolatileucharp,\n"
+    "                                           global const volatile unsigned char * globalconstvolatileunsignedcharp,\n"
+    "                                           global const volatile short*globalconstvolatileshortp,\n"
+    "                                           global const volatile ushort *globalconstvolatileushortp,\n"
+    "                                           global const volatile unsigned short* globalconstvolatileunsignedshortp,\n"
+    "                                           global const volatile int * globalconstvolatileintp,\n"
+    "                                           global const volatile uint*globalconstvolatileuintp,\n"
+    "                                           global const volatile unsigned int *globalconstvolatileunsignedintp,\n"
+    "                                           global const volatile float *globalconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_restrict_p(global const volatile void* restrict globalconstvolatilevoidrestrictp,\n"
+    "                                                    global const volatile char * restrict globalconstvolatilecharrestrictp,\n"
+    "                                                    global const volatile uchar*restrict globalconstvolatileucharrestrictp,\n"
+    "                                                    global const volatile unsigned char *restrict globalconstvolatileunsignedcharrestrictp,\n"
+    "                                                    global const volatile short* restrict globalconstvolatileshortrestrictp,\n"
+    "                                                    global const volatile ushort * restrict globalconstvolatileushortrestrictp,\n"
+    "                                                    global const volatile unsigned short*restrict globalconstvolatileunsignedshortrestrictp,\n"
+    "                                                    global const volatile int *restrict globalconstvolatileintrestrictp,\n"
+    "                                                    global const volatile uint* restrict globalconstvolatileuintrestrictp,\n"
+    "                                                    global const volatile unsigned int * restrict globalconstvolatileunsignedintrestrictp,\n"
+    "                                                    global const volatile float * restrict globalconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_p(local void*localvoidp,\n"
+    "                           local char *localcharp,\n"
+    "                           local uchar* localucharp,\n"
+    "                           local unsigned char * localunsignedcharp,\n"
+    "                           local short*localshortp,\n"
+    "                           local ushort *localushortp,\n"
+    "                           local unsigned short* localunsignedshortp,\n"
+    "                           local int * localintp,\n"
+    "                           local uint*localuintp,\n"
+    "                           local unsigned int *localunsignedintp,\n"
+    "                           local float *localfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_restrict_p(local void* restrict localvoidrestrictp,\n"
+    "                                    local char * restrict localcharrestrictp,\n"
+    "                                    local uchar*restrict localucharrestrictp,\n"
+    "                                    local unsigned char *restrict localunsignedcharrestrictp,\n"
+    "                                    local short* restrict localshortrestrictp,\n"
+    "                                    local ushort * restrict localushortrestrictp,\n"
+    "                                    local unsigned short*restrict localunsignedshortrestrictp,\n"
+    "                                    local int *restrict localintrestrictp,\n"
+    "                                    local uint* restrict localuintrestrictp,\n"
+    "                                    local unsigned int * restrict localunsignedintrestrictp,\n"
+    "                                    local float * restrict localfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_p(local const void*localconstvoidp,\n"
+    "                                 local const char *localconstcharp,\n"
+    "                                 local const uchar* localconstucharp,\n"
+    "                                 local const unsigned char * localconstunsignedcharp,\n"
+    "                                 local const short*localconstshortp,\n"
+    "                                 local const ushort *localconstushortp,\n"
+    "                                 local const unsigned short* localconstunsignedshortp,\n"
+    "                                 local const int * localconstintp,\n"
+    "                                 local const uint*localconstuintp,\n"
+    "                                 local const unsigned int *localconstunsignedintp,\n"
+    "                                 local const float *localconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_restrict_p(local const void* restrict localconstvoidrestrictp,\n"
+    "                                          local const char * restrict localconstcharrestrictp,\n"
+    "                                          local const uchar*restrict localconstucharrestrictp,\n"
+    "                                          local const unsigned char *restrict localconstunsignedcharrestrictp,\n"
+    "                                          local const short* restrict localconstshortrestrictp,\n"
+    "                                          local const ushort * restrict localconstushortrestrictp,\n"
+    "                                          local const unsigned short*restrict localconstunsignedshortrestrictp,\n"
+    "                                          local const int *restrict localconstintrestrictp,\n"
+    "                                          local const uint* restrict localconstuintrestrictp,\n"
+    "                                          local const unsigned int * restrict localconstunsignedintrestrictp,\n"
+    "                                          local const float * restrict localconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_p(local volatile void*localvolatilevoidp,\n"
+    "                                    local volatile char *localvolatilecharp,\n"
+    "                                    local volatile uchar* localvolatileucharp,\n"
+    "                                    local volatile unsigned char * localvolatileunsignedcharp,\n"
+    "                                    local volatile short*localvolatileshortp,\n"
+    "                                    local volatile ushort *localvolatileushortp,\n"
+    "                                    local volatile unsigned short* localvolatileunsignedshortp,\n"
+    "                                    local volatile int * localvolatileintp,\n"
+    "                                    local volatile uint*localvolatileuintp,\n"
+    "                                    local volatile unsigned int *localvolatileunsignedintp,\n"
+    "                                    local volatile float *localvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_restrict_p(local volatile void* restrict localvolatilevoidrestrictp,\n"
+    "                                             local volatile char * restrict localvolatilecharrestrictp,\n"
+    "                                             local volatile uchar*restrict localvolatileucharrestrictp,\n"
+    "                                             local volatile unsigned char *restrict localvolatileunsignedcharrestrictp,\n"
+    "                                             local volatile short* restrict localvolatileshortrestrictp,\n"
+    "                                             local volatile ushort * restrict localvolatileushortrestrictp,\n"
+    "                                             local volatile unsigned short*restrict localvolatileunsignedshortrestrictp,\n"
+    "                                             local volatile int *restrict localvolatileintrestrictp,\n"
+    "                                             local volatile uint* restrict localvolatileuintrestrictp,\n"
+    "                                             local volatile unsigned int * restrict localvolatileunsignedintrestrictp,\n"
+    "                                             local volatile float * restrict localvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_p(local const volatile void*localconstvolatilevoidp,\n"
+    "                                          local const volatile char *localconstvolatilecharp,\n"
+    "                                          local const volatile uchar* localconstvolatileucharp,\n"
+    "                                          local const volatile unsigned char * localconstvolatileunsignedcharp,\n"
+    "                                          local const volatile short*localconstvolatileshortp,\n"
+    "                                          local const volatile ushort *localconstvolatileushortp,\n"
+    "                                          local const volatile unsigned short* localconstvolatileunsignedshortp,\n"
+    "                                          local const volatile int * localconstvolatileintp,\n"
+    "                                          local const volatile uint*localconstvolatileuintp,\n"
+    "                                          local const volatile unsigned int *localconstvolatileunsignedintp,\n"
+    "                                          local const volatile float *localconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_restrict_p(local const volatile void* restrict localconstvolatilevoidrestrictp,\n"
+    "                                                   local const volatile char * restrict localconstvolatilecharrestrictp,\n"
+    "                                                   local const volatile uchar*restrict localconstvolatileucharrestrictp,\n"
+    "                                                   local const volatile unsigned char *restrict localconstvolatileunsignedcharrestrictp,\n"
+    "                                                   local const volatile short* restrict localconstvolatileshortrestrictp,\n"
+    "                                                   local const volatile ushort * restrict localconstvolatileushortrestrictp,\n"
+    "                                                   local const volatile unsigned short*restrict localconstvolatileunsignedshortrestrictp,\n"
+    "                                                   local const volatile int *restrict localconstvolatileintrestrictp,\n"
+    "                                                   local const volatile uint* restrict localconstvolatileuintrestrictp,\n"
+    "                                                   local const volatile unsigned int * restrict localconstvolatileunsignedintrestrictp,\n"
+    "                                                   local const volatile float * restrict localconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void scalar_d(char chard,\n"
+    "                     uchar uchard,\n"
+    "                     unsigned char unsignedchard,\n"
+    "                     short shortd,\n"
+    "                     ushort ushortd,\n"
+    "                     unsigned short unsignedshortd,\n"
+    "                     int intd,\n"
+    "                     uint uintd,\n"
+    "                     unsigned int unsignedintd,\n"
+    "                     float floatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_scalar_d(const char constchard,\n"
+    "                           const uchar constuchard,\n"
+    "                           const unsigned char constunsignedchard,\n"
+    "                           const short constshortd,\n"
+    "                           const ushort constushortd,\n"
+    "                           const unsigned short constunsignedshortd,\n"
+    "                           const int constintd,\n"
+    "                           const uint constuintd,\n"
+    "                           const unsigned int constunsignedintd,\n"
+    "                           const float constfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_scalar_d(private char privatechard,\n"
+    "                             private uchar privateuchard,\n"
+    "                             private unsigned char privateunsignedchard,\n"
+    "                             private short privateshortd,\n"
+    "                             private ushort privateushortd,\n"
+    "                             private unsigned short privateunsignedshortd,\n"
+    "                             private int privateintd,\n"
+    "                             private uint privateuintd,\n"
+    "                             private unsigned int privateunsignedintd,\n"
+    "                             private float privatefloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_scalar_d(private const char privateconstchard,\n"
+    "                                   private const uchar privateconstuchard,\n"
+    "                                   private const unsigned char privateconstunsignedchard,\n"
+    "                                   private const short privateconstshortd,\n"
+    "                                   private const ushort privateconstushortd,\n"
+    "                                   private const unsigned short privateconstunsignedshortd,\n"
+    "                                   private const int privateconstintd,\n"
+    "                                   private const uint privateconstuintd,\n"
+    "                                   private const unsigned int privateconstunsignedintd,\n"
+    "                                   private const float privateconstfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_p0(constant char2*constantchar2p,\n"
+    "                               constant uchar2 *constantuchar2p,\n"
+    "                               constant short2* constantshort2p,\n"
+    "                               constant ushort2 * constantushort2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p1(constant int2*constantint2p,\n"
+    "                               constant uint2 *constantuint2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p2(constant float2*constantfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p0(constant char2 *restrict constantchar2restrictp,\n"
+    "                                        constant uchar2* restrict constantuchar2restrictp,\n"
+    "                                        constant short2 * restrict constantshort2restrictp,\n"
+    "                                        constant ushort2*restrict constantushort2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p1(constant int2 *restrict constantint2restrictp,\n"
+    "                                        constant uint2* restrict constantuint2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p2(constant float2 *restrict constantfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_p(global char2*globalchar2p,\n"
+    "                             global uchar2 *globaluchar2p,\n"
+    "                             global short2* globalshort2p,\n"
+    "                             global ushort2 * globalushort2p,\n"
+    "                             global int2*globalint2p,\n"
+    "                             global uint2 *globaluint2p,\n"
+    "                             global float2*globalfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_restrict_p(global char2 *restrict globalchar2restrictp,\n"
+    "                                      global uchar2* restrict globaluchar2restrictp,\n"
+    "                                      global short2 * restrict globalshort2restrictp,\n"
+    "                                      global ushort2*restrict globalushort2restrictp,\n"
+    "                                      global int2 *restrict globalint2restrictp,\n"
+    "                                      global uint2* restrict globaluint2restrictp,\n"
+    "                                      global float2 *restrict globalfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_p(global const char2* globalconstchar2p,\n"
+    "                                   global const uchar2 * globalconstuchar2p,\n"
+    "                                   global const short2*globalconstshort2p,\n"
+    "                                   global const ushort2 *globalconstushort2p,\n"
+    "                                   global const int2* globalconstint2p,\n"
+    "                                   global const uint2 * globalconstuint2p,\n"
+    "                                   global const float2* globalconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_restrict_p(global const char2 * restrict globalconstchar2restrictp,\n"
+    "                                            global const uchar2*restrict globalconstuchar2restrictp,\n"
+    "                                            global const short2 *restrict globalconstshort2restrictp,\n"
+    "                                            global const ushort2* restrict globalconstushort2restrictp,\n"
+    "                                            global const int2 * restrict globalconstint2restrictp,\n"
+    "                                            global const uint2*restrict globalconstuint2restrictp,\n"
+    "                                            global const float2 * restrict globalconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_p(global volatile char2*globalvolatilechar2p,\n"
+    "                                      global volatile uchar2 *globalvolatileuchar2p,\n"
+    "                                      global volatile short2* globalvolatileshort2p,\n"
+    "                                      global volatile ushort2 * globalvolatileushort2p,\n"
+    "                                      global volatile int2*globalvolatileint2p,\n"
+    "                                      global volatile uint2 *globalvolatileuint2p,\n"
+    "                                      global volatile float2*globalvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_restrict_p(global volatile char2 *restrict globalvolatilechar2restrictp,\n"
+    "                                               global volatile uchar2* restrict globalvolatileuchar2restrictp,\n"
+    "                                               global volatile short2 * restrict globalvolatileshort2restrictp,\n"
+    "                                               global volatile ushort2*restrict globalvolatileushort2restrictp,\n"
+    "                                               global volatile int2 *restrict globalvolatileint2restrictp,\n"
+    "                                               global volatile uint2* restrict globalvolatileuint2restrictp,\n"
+    "                                               global volatile float2 *restrict globalvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_p(global const volatile char2* globalconstvolatilechar2p,\n"
+    "                                            global const volatile uchar2 * globalconstvolatileuchar2p,\n"
+    "                                            global const volatile short2*globalconstvolatileshort2p,\n"
+    "                                            global const volatile ushort2 *globalconstvolatileushort2p,\n"
+    "                                            global const volatile int2* globalconstvolatileint2p,\n"
+    "                                            global const volatile uint2 * globalconstvolatileuint2p,\n"
+    "                                            global const volatile float2* globalconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_restrict_p(global const volatile char2 * restrict globalconstvolatilechar2restrictp,\n"
+    "                                                     global const volatile uchar2*restrict globalconstvolatileuchar2restrictp,\n"
+    "                                                     global const volatile short2 *restrict globalconstvolatileshort2restrictp,\n"
+    "                                                     global const volatile ushort2* restrict globalconstvolatileushort2restrictp,\n"
+    "                                                     global const volatile int2 * restrict globalconstvolatileint2restrictp,\n"
+    "                                                     global const volatile uint2*restrict globalconstvolatileuint2restrictp,\n"
+    "                                                     global const volatile float2 * restrict globalconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_p(local char2*localchar2p,\n"
+    "                            local uchar2 *localuchar2p,\n"
+    "                            local short2* localshort2p,\n"
+    "                            local ushort2 * localushort2p,\n"
+    "                            local int2*localint2p,\n"
+    "                            local uint2 *localuint2p,\n"
+    "                            local float2*localfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_restrict_p(local char2 *restrict localchar2restrictp,\n"
+    "                                     local uchar2* restrict localuchar2restrictp,\n"
+    "                                     local short2 * restrict localshort2restrictp,\n"
+    "                                     local ushort2*restrict localushort2restrictp,\n"
+    "                                     local int2 *restrict localint2restrictp,\n"
+    "                                     local uint2* restrict localuint2restrictp,\n"
+    "                                     local float2 *restrict localfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_p(local const char2* localconstchar2p,\n"
+    "                                  local const uchar2 * localconstuchar2p,\n"
+    "                                  local const short2*localconstshort2p,\n"
+    "                                  local const ushort2 *localconstushort2p,\n"
+    "                                  local const int2* localconstint2p,\n"
+    "                                  local const uint2 * localconstuint2p,\n"
+    "                                  local const float2* localconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_restrict_p(local const char2 * restrict localconstchar2restrictp,\n"
+    "                                           local const uchar2*restrict localconstuchar2restrictp,\n"
+    "                                           local const short2 *restrict localconstshort2restrictp,\n"
+    "                                           local const ushort2* restrict localconstushort2restrictp,\n"
+    "                                           local const int2 * restrict localconstint2restrictp,\n"
+    "                                           local const uint2*restrict localconstuint2restrictp,\n"
+    "                                           local const float2 * restrict localconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_p(local volatile char2*localvolatilechar2p,\n"
+    "                                     local volatile uchar2 *localvolatileuchar2p,\n"
+    "                                     local volatile short2* localvolatileshort2p,\n"
+    "                                     local volatile ushort2 * localvolatileushort2p,\n"
+    "                                     local volatile int2*localvolatileint2p,\n"
+    "                                     local volatile uint2 *localvolatileuint2p,\n"
+    "                                     local volatile float2*localvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_restrict_p(local volatile char2 *restrict localvolatilechar2restrictp,\n"
+    "                                              local volatile uchar2* restrict localvolatileuchar2restrictp,\n"
+    "                                              local volatile short2 * restrict localvolatileshort2restrictp,\n"
+    "                                              local volatile ushort2*restrict localvolatileushort2restrictp,\n"
+    "                                              local volatile int2 *restrict localvolatileint2restrictp,\n"
+    "                                              local volatile uint2* restrict localvolatileuint2restrictp,\n"
+    "                                              local volatile float2 *restrict localvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_p(local const volatile char2* localconstvolatilechar2p,\n"
+    "                                           local const volatile uchar2 * localconstvolatileuchar2p,\n"
+    "                                           local const volatile short2*localconstvolatileshort2p,\n"
+    "                                           local const volatile ushort2 *localconstvolatileushort2p,\n"
+    "                                           local const volatile int2* localconstvolatileint2p,\n"
+    "                                           local const volatile uint2 * localconstvolatileuint2p,\n"
+    "                                           local const volatile float2* localconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_restrict_p(local const volatile char2 * restrict localconstvolatilechar2restrictp,\n"
+    "                                                    local const volatile uchar2*restrict localconstvolatileuchar2restrictp,\n"
+    "                                                    local const volatile short2 *restrict localconstvolatileshort2restrictp,\n"
+    "                                                    local const volatile ushort2* restrict localconstvolatileushort2restrictp,\n"
+    "                                                    local const volatile int2 * restrict localconstvolatileint2restrictp,\n"
+    "                                                    local const volatile uint2*restrict localconstvolatileuint2restrictp,\n"
+    "                                                    local const volatile float2 * restrict localconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector2_d(char2 char2d,\n"
+    "                      uchar2 uchar2d,\n"
+    "                      short2 short2d,\n"
+    "                      ushort2 ushort2d,\n"
+    "                      int2 int2d,\n"
+    "                      uint2 uint2d,\n"
+    "                      float2 float2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector2_d(const char2 constchar2d,\n"
+    "                            const uchar2 constuchar2d,\n"
+    "                            const short2 constshort2d,\n"
+    "                            const ushort2 constushort2d,\n"
+    "                            const int2 constint2d,\n"
+    "                            const uint2 constuint2d,\n"
+    "                            const float2 constfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector2_d(private char2 privatechar2d,\n"
+    "                              private uchar2 privateuchar2d,\n"
+    "                              private short2 privateshort2d,\n"
+    "                              private ushort2 privateushort2d,\n"
+    "                              private int2 privateint2d,\n"
+    "                              private uint2 privateuint2d,\n"
+    "                              private float2 privatefloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector2_d(private const char2 privateconstchar2d,\n"
+    "                                    private const uchar2 privateconstuchar2d,\n"
+    "                                    private const short2 privateconstshort2d,\n"
+    "                                    private const ushort2 privateconstushort2d,\n"
+    "                                    private const int2 privateconstint2d,\n"
+    "                                    private const uint2 privateconstuint2d,\n"
+    "                                    private const float2 privateconstfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p0(constant char3*constantchar3p,\n"
+    "                               constant uchar3 *constantuchar3p,\n"
+    "                               constant short3* constantshort3p,\n"
+    "                               constant ushort3 * constantushort3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p1(constant int3*constantint3p,\n"
+    "                               constant uint3 *constantuint3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p2(constant float3*constantfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p0(constant char3 *restrict constantchar3restrictp,\n"
+    "                                        constant uchar3* restrict constantuchar3restrictp,\n"
+    "                                        constant short3 * restrict constantshort3restrictp,\n"
+    "                                        constant ushort3*restrict constantushort3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p1(constant int3 *restrict constantint3restrictp,\n"
+    "                                        constant uint3* restrict constantuint3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p2(constant float3 *restrict constantfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_p(global char3*globalchar3p,\n"
+    "                             global uchar3 *globaluchar3p,\n"
+    "                             global short3* globalshort3p,\n"
+    "                             global ushort3 * globalushort3p,\n"
+    "                             global int3*globalint3p,\n"
+    "                             global uint3 *globaluint3p,\n"
+    "                             global float3*globalfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_restrict_p(global char3 *restrict globalchar3restrictp,\n"
+    "                                      global uchar3* restrict globaluchar3restrictp,\n"
+    "                                      global short3 * restrict globalshort3restrictp,\n"
+    "                                      global ushort3*restrict globalushort3restrictp,\n"
+    "                                      global int3 *restrict globalint3restrictp,\n"
+    "                                      global uint3* restrict globaluint3restrictp,\n"
+    "                                      global float3 *restrict globalfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_p(global const char3* globalconstchar3p,\n"
+    "                                   global const uchar3 * globalconstuchar3p,\n"
+    "                                   global const short3*globalconstshort3p,\n"
+    "                                   global const ushort3 *globalconstushort3p,\n"
+    "                                   global const int3* globalconstint3p,\n"
+    "                                   global const uint3 * globalconstuint3p,\n"
+    "                                   global const float3* globalconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_restrict_p(global const char3 * restrict globalconstchar3restrictp,\n"
+    "                                            global const uchar3*restrict globalconstuchar3restrictp,\n"
+    "                                            global const short3 *restrict globalconstshort3restrictp,\n"
+    "                                            global const ushort3* restrict globalconstushort3restrictp,\n"
+    "                                            global const int3 * restrict globalconstint3restrictp,\n"
+    "                                            global const uint3*restrict globalconstuint3restrictp,\n"
+    "                                            global const float3 * restrict globalconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_p(global volatile char3*globalvolatilechar3p,\n"
+    "                                      global volatile uchar3 *globalvolatileuchar3p,\n"
+    "                                      global volatile short3* globalvolatileshort3p,\n"
+    "                                      global volatile ushort3 * globalvolatileushort3p,\n"
+    "                                      global volatile int3*globalvolatileint3p,\n"
+    "                                      global volatile uint3 *globalvolatileuint3p,\n"
+    "                                      global volatile float3*globalvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_restrict_p(global volatile char3 *restrict globalvolatilechar3restrictp,\n"
+    "                                               global volatile uchar3* restrict globalvolatileuchar3restrictp,\n"
+    "                                               global volatile short3 * restrict globalvolatileshort3restrictp,\n"
+    "                                               global volatile ushort3*restrict globalvolatileushort3restrictp,\n"
+    "                                               global volatile int3 *restrict globalvolatileint3restrictp,\n"
+    "                                               global volatile uint3* restrict globalvolatileuint3restrictp,\n"
+    "                                               global volatile float3 *restrict globalvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_p(global const volatile char3* globalconstvolatilechar3p,\n"
+    "                                            global const volatile uchar3 * globalconstvolatileuchar3p,\n"
+    "                                            global const volatile short3*globalconstvolatileshort3p,\n"
+    "                                            global const volatile ushort3 *globalconstvolatileushort3p,\n"
+    "                                            global const volatile int3* globalconstvolatileint3p,\n"
+    "                                            global const volatile uint3 * globalconstvolatileuint3p,\n"
+    "                                            global const volatile float3* globalconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_restrict_p(global const volatile char3 * restrict globalconstvolatilechar3restrictp,\n"
+    "                                                     global const volatile uchar3*restrict globalconstvolatileuchar3restrictp,\n"
+    "                                                     global const volatile short3 *restrict globalconstvolatileshort3restrictp,\n"
+    "                                                     global const volatile ushort3* restrict globalconstvolatileushort3restrictp,\n"
+    "                                                     global const volatile int3 * restrict globalconstvolatileint3restrictp,\n"
+    "                                                     global const volatile uint3*restrict globalconstvolatileuint3restrictp,\n"
+    "                                                     global const volatile float3 * restrict globalconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_p(local char3*localchar3p,\n"
+    "                            local uchar3 *localuchar3p,\n"
+    "                            local short3* localshort3p,\n"
+    "                            local ushort3 * localushort3p,\n"
+    "                            local int3*localint3p,\n"
+    "                            local uint3 *localuint3p,\n"
+    "                            local float3*localfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_restrict_p(local char3 *restrict localchar3restrictp,\n"
+    "                                     local uchar3* restrict localuchar3restrictp,\n"
+    "                                     local short3 * restrict localshort3restrictp,\n"
+    "                                     local ushort3*restrict localushort3restrictp,\n"
+    "                                     local int3 *restrict localint3restrictp,\n"
+    "                                     local uint3* restrict localuint3restrictp,\n"
+    "                                     local float3 *restrict localfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_p(local const char3* localconstchar3p,\n"
+    "                                  local const uchar3 * localconstuchar3p,\n"
+    "                                  local const short3*localconstshort3p,\n"
+    "                                  local const ushort3 *localconstushort3p,\n"
+    "                                  local const int3* localconstint3p,\n"
+    "                                  local const uint3 * localconstuint3p,\n"
+    "                                  local const float3* localconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_restrict_p(local const char3 * restrict localconstchar3restrictp,\n"
+    "                                           local const uchar3*restrict localconstuchar3restrictp,\n"
+    "                                           local const short3 *restrict localconstshort3restrictp,\n"
+    "                                           local const ushort3* restrict localconstushort3restrictp,\n"
+    "                                           local const int3 * restrict localconstint3restrictp,\n"
+    "                                           local const uint3*restrict localconstuint3restrictp,\n"
+    "                                           local const float3 * restrict localconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_p(local volatile char3*localvolatilechar3p,\n"
+    "                                     local volatile uchar3 *localvolatileuchar3p,\n"
+    "                                     local volatile short3* localvolatileshort3p,\n"
+    "                                     local volatile ushort3 * localvolatileushort3p,\n"
+    "                                     local volatile int3*localvolatileint3p,\n"
+    "                                     local volatile uint3 *localvolatileuint3p,\n"
+    "                                     local volatile float3*localvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_restrict_p(local volatile char3 *restrict localvolatilechar3restrictp,\n"
+    "                                              local volatile uchar3* restrict localvolatileuchar3restrictp,\n"
+    "                                              local volatile short3 * restrict localvolatileshort3restrictp,\n"
+    "                                              local volatile ushort3*restrict localvolatileushort3restrictp,\n"
+    "                                              local volatile int3 *restrict localvolatileint3restrictp,\n"
+    "                                              local volatile uint3* restrict localvolatileuint3restrictp,\n"
+    "                                              local volatile float3 *restrict localvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_p(local const volatile char3* localconstvolatilechar3p,\n"
+    "                                           local const volatile uchar3 * localconstvolatileuchar3p,\n"
+    "                                           local const volatile short3*localconstvolatileshort3p,\n"
+    "                                           local const volatile ushort3 *localconstvolatileushort3p,\n"
+    "                                           local const volatile int3* localconstvolatileint3p,\n"
+    "                                           local const volatile uint3 * localconstvolatileuint3p,\n"
+    "                                           local const volatile float3* localconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_restrict_p(local const volatile char3 * restrict localconstvolatilechar3restrictp,\n"
+    "                                                    local const volatile uchar3*restrict localconstvolatileuchar3restrictp,\n"
+    "                                                    local const volatile short3 *restrict localconstvolatileshort3restrictp,\n"
+    "                                                    local const volatile ushort3* restrict localconstvolatileushort3restrictp,\n"
+    "                                                    local const volatile int3 * restrict localconstvolatileint3restrictp,\n"
+    "                                                    local const volatile uint3*restrict localconstvolatileuint3restrictp,\n"
+    "                                                    local const volatile float3 * restrict localconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector3_d(char3 char3d,\n"
+    "                      uchar3 uchar3d,\n"
+    "                      short3 short3d,\n"
+    "                      ushort3 ushort3d,\n"
+    "                      int3 int3d,\n"
+    "                      uint3 uint3d,\n"
+    "                      float3 float3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector3_d(const char3 constchar3d,\n"
+    "                            const uchar3 constuchar3d,\n"
+    "                            const short3 constshort3d,\n"
+    "                            const ushort3 constushort3d,\n"
+    "                            const int3 constint3d,\n"
+    "                            const uint3 constuint3d,\n"
+    "                            const float3 constfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector3_d(private char3 privatechar3d,\n"
+    "                              private uchar3 privateuchar3d,\n"
+    "                              private short3 privateshort3d,\n"
+    "                              private ushort3 privateushort3d,\n"
+    "                              private int3 privateint3d,\n"
+    "                              private uint3 privateuint3d,\n"
+    "                              private float3 privatefloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector3_d(private const char3 privateconstchar3d,\n"
+    "                                    private const uchar3 privateconstuchar3d,\n"
+    "                                    private const short3 privateconstshort3d,\n"
+    "                                    private const ushort3 privateconstushort3d,\n"
+    "                                    private const int3 privateconstint3d,\n"
+    "                                    private const uint3 privateconstuint3d,\n"
+    "                                    private const float3 privateconstfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p0(constant char4*constantchar4p,\n"
+    "                               constant uchar4 *constantuchar4p,\n"
+    "                               constant short4* constantshort4p,\n"
+    "                               constant ushort4 * constantushort4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p1(constant int4*constantint4p,\n"
+    "                               constant uint4 *constantuint4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p2(constant float4*constantfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p0(constant char4 *restrict constantchar4restrictp,\n"
+    "                                        constant uchar4* restrict constantuchar4restrictp,\n"
+    "                                        constant short4 * restrict constantshort4restrictp,\n"
+    "                                        constant ushort4*restrict constantushort4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p1(constant int4 *restrict constantint4restrictp,\n"
+    "                                        constant uint4* restrict constantuint4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p2(constant float4 *restrict constantfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_p(global char4*globalchar4p,\n"
+    "                             global uchar4 *globaluchar4p,\n"
+    "                             global short4* globalshort4p,\n"
+    "                             global ushort4 * globalushort4p,\n"
+    "                             global int4*globalint4p,\n"
+    "                             global uint4 *globaluint4p,\n"
+    "                             global float4*globalfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_restrict_p(global char4 *restrict globalchar4restrictp,\n"
+    "                                      global uchar4* restrict globaluchar4restrictp,\n"
+    "                                      global short4 * restrict globalshort4restrictp,\n"
+    "                                      global ushort4*restrict globalushort4restrictp,\n"
+    "                                      global int4 *restrict globalint4restrictp,\n"
+    "                                      global uint4* restrict globaluint4restrictp,\n"
+    "                                      global float4 *restrict globalfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_p(global const char4* globalconstchar4p,\n"
+    "                                   global const uchar4 * globalconstuchar4p,\n"
+    "                                   global const short4*globalconstshort4p,\n"
+    "                                   global const ushort4 *globalconstushort4p,\n"
+    "                                   global const int4* globalconstint4p,\n"
+    "                                   global const uint4 * globalconstuint4p,\n"
+    "                                   global const float4* globalconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_restrict_p(global const char4 * restrict globalconstchar4restrictp,\n"
+    "                                            global const uchar4*restrict globalconstuchar4restrictp,\n"
+    "                                            global const short4 *restrict globalconstshort4restrictp,\n"
+    "                                            global const ushort4* restrict globalconstushort4restrictp,\n"
+    "                                            global const int4 * restrict globalconstint4restrictp,\n"
+    "                                            global const uint4*restrict globalconstuint4restrictp,\n"
+    "                                            global const float4 * restrict globalconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_p(global volatile char4*globalvolatilechar4p,\n"
+    "                                      global volatile uchar4 *globalvolatileuchar4p,\n"
+    "                                      global volatile short4* globalvolatileshort4p,\n"
+    "                                      global volatile ushort4 * globalvolatileushort4p,\n"
+    "                                      global volatile int4*globalvolatileint4p,\n"
+    "                                      global volatile uint4 *globalvolatileuint4p,\n"
+    "                                      global volatile float4*globalvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_restrict_p(global volatile char4 *restrict globalvolatilechar4restrictp,\n"
+    "                                               global volatile uchar4* restrict globalvolatileuchar4restrictp,\n"
+    "                                               global volatile short4 * restrict globalvolatileshort4restrictp,\n"
+    "                                               global volatile ushort4*restrict globalvolatileushort4restrictp,\n"
+    "                                               global volatile int4 *restrict globalvolatileint4restrictp,\n"
+    "                                               global volatile uint4* restrict globalvolatileuint4restrictp,\n"
+    "                                               global volatile float4 *restrict globalvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_p(global const volatile char4* globalconstvolatilechar4p,\n"
+    "                                            global const volatile uchar4 * globalconstvolatileuchar4p,\n"
+    "                                            global const volatile short4*globalconstvolatileshort4p,\n"
+    "                                            global const volatile ushort4 *globalconstvolatileushort4p,\n"
+    "                                            global const volatile int4* globalconstvolatileint4p,\n"
+    "                                            global const volatile uint4 * globalconstvolatileuint4p,\n"
+    "                                            global const volatile float4* globalconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_restrict_p(global const volatile char4 * restrict globalconstvolatilechar4restrictp,\n"
+    "                                                     global const volatile uchar4*restrict globalconstvolatileuchar4restrictp,\n"
+    "                                                     global const volatile short4 *restrict globalconstvolatileshort4restrictp,\n"
+    "                                                     global const volatile ushort4* restrict globalconstvolatileushort4restrictp,\n"
+    "                                                     global const volatile int4 * restrict globalconstvolatileint4restrictp,\n"
+    "                                                     global const volatile uint4*restrict globalconstvolatileuint4restrictp,\n"
+    "                                                     global const volatile float4 * restrict globalconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_p(local char4*localchar4p,\n"
+    "                            local uchar4 *localuchar4p,\n"
+    "                            local short4* localshort4p,\n"
+    "                            local ushort4 * localushort4p,\n"
+    "                            local int4*localint4p,\n"
+    "                            local uint4 *localuint4p,\n"
+    "                            local float4*localfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_restrict_p(local char4 *restrict localchar4restrictp,\n"
+    "                                     local uchar4* restrict localuchar4restrictp,\n"
+    "                                     local short4 * restrict localshort4restrictp,\n"
+    "                                     local ushort4*restrict localushort4restrictp,\n"
+    "                                     local int4 *restrict localint4restrictp,\n"
+    "                                     local uint4* restrict localuint4restrictp,\n"
+    "                                     local float4 *restrict localfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_p(local const char4* localconstchar4p,\n"
+    "                                  local const uchar4 * localconstuchar4p,\n"
+    "                                  local const short4*localconstshort4p,\n"
+    "                                  local const ushort4 *localconstushort4p,\n"
+    "                                  local const int4* localconstint4p,\n"
+    "                                  local const uint4 * localconstuint4p,\n"
+    "                                  local const float4* localconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_restrict_p(local const char4 * restrict localconstchar4restrictp,\n"
+    "                                           local const uchar4*restrict localconstuchar4restrictp,\n"
+    "                                           local const short4 *restrict localconstshort4restrictp,\n"
+    "                                           local const ushort4* restrict localconstushort4restrictp,\n"
+    "                                           local const int4 * restrict localconstint4restrictp,\n"
+    "                                           local const uint4*restrict localconstuint4restrictp,\n"
+    "                                           local const float4 * restrict localconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_p(local volatile char4*localvolatilechar4p,\n"
+    "                                     local volatile uchar4 *localvolatileuchar4p,\n"
+    "                                     local volatile short4* localvolatileshort4p,\n"
+    "                                     local volatile ushort4 * localvolatileushort4p,\n"
+    "                                     local volatile int4*localvolatileint4p,\n"
+    "                                     local volatile uint4 *localvolatileuint4p,\n"
+    "                                     local volatile float4*localvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_restrict_p(local volatile char4 *restrict localvolatilechar4restrictp,\n"
+    "                                              local volatile uchar4* restrict localvolatileuchar4restrictp,\n"
+    "                                              local volatile short4 * restrict localvolatileshort4restrictp,\n"
+    "                                              local volatile ushort4*restrict localvolatileushort4restrictp,\n"
+    "                                              local volatile int4 *restrict localvolatileint4restrictp,\n"
+    "                                              local volatile uint4* restrict localvolatileuint4restrictp,\n"
+    "                                              local volatile float4 *restrict localvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_p(local const volatile char4* localconstvolatilechar4p,\n"
+    "                                           local const volatile uchar4 * localconstvolatileuchar4p,\n"
+    "                                           local const volatile short4*localconstvolatileshort4p,\n"
+    "                                           local const volatile ushort4 *localconstvolatileushort4p,\n"
+    "                                           local const volatile int4* localconstvolatileint4p,\n"
+    "                                           local const volatile uint4 * localconstvolatileuint4p,\n"
+    "                                           local const volatile float4* localconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_restrict_p(local const volatile char4 * restrict localconstvolatilechar4restrictp,\n"
+    "                                                    local const volatile uchar4*restrict localconstvolatileuchar4restrictp,\n"
+    "                                                    local const volatile short4 *restrict localconstvolatileshort4restrictp,\n"
+    "                                                    local const volatile ushort4* restrict localconstvolatileushort4restrictp,\n"
+    "                                                    local const volatile int4 * restrict localconstvolatileint4restrictp,\n"
+    "                                                    local const volatile uint4*restrict localconstvolatileuint4restrictp,\n"
+    "                                                    local const volatile float4 * restrict localconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector4_d(char4 char4d,\n"
+    "                      uchar4 uchar4d,\n"
+    "                      short4 short4d,\n"
+    "                      ushort4 ushort4d,\n"
+    "                      int4 int4d,\n"
+    "                      uint4 uint4d,\n"
+    "                      float4 float4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector4_d(const char4 constchar4d,\n"
+    "                            const uchar4 constuchar4d,\n"
+    "                            const short4 constshort4d,\n"
+    "                            const ushort4 constushort4d,\n"
+    "                            const int4 constint4d,\n"
+    "                            const uint4 constuint4d,\n"
+    "                            const float4 constfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector4_d(private char4 privatechar4d,\n"
+    "                              private uchar4 privateuchar4d,\n"
+    "                              private short4 privateshort4d,\n"
+    "                              private ushort4 privateushort4d,\n"
+    "                              private int4 privateint4d,\n"
+    "                              private uint4 privateuint4d,\n"
+    "                              private float4 privatefloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector4_d(private const char4 privateconstchar4d,\n"
+    "                                    private const uchar4 privateconstuchar4d,\n"
+    "                                    private const short4 privateconstshort4d,\n"
+    "                                    private const ushort4 privateconstushort4d,\n"
+    "                                    private const int4 privateconstint4d,\n"
+    "                                    private const uint4 privateconstuint4d,\n"
+    "                                    private const float4 privateconstfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p0(constant char8*constantchar8p,\n"
+    "                               constant uchar8 *constantuchar8p,\n"
+    "                               constant short8* constantshort8p,\n"
+    "                               constant ushort8 * constantushort8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p1(constant int8*constantint8p,\n"
+    "                               constant uint8 *constantuint8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p2(constant float8*constantfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p0(constant char8 *restrict constantchar8restrictp,\n"
+    "                                        constant uchar8* restrict constantuchar8restrictp,\n"
+    "                                        constant short8 * restrict constantshort8restrictp,\n"
+    "                                        constant ushort8*restrict constantushort8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p1(constant int8 *restrict constantint8restrictp,\n"
+    "                                        constant uint8* restrict constantuint8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p2(constant float8 *restrict constantfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_p(global char8*globalchar8p,\n"
+    "                             global uchar8 *globaluchar8p,\n"
+    "                             global short8* globalshort8p,\n"
+    "                             global ushort8 * globalushort8p,\n"
+    "                             global int8*globalint8p,\n"
+    "                             global uint8 *globaluint8p,\n"
+    "                             global float8*globalfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_restrict_p(global char8 *restrict globalchar8restrictp,\n"
+    "                                      global uchar8* restrict globaluchar8restrictp,\n"
+    "                                      global short8 * restrict globalshort8restrictp,\n"
+    "                                      global ushort8*restrict globalushort8restrictp,\n"
+    "                                      global int8 *restrict globalint8restrictp,\n"
+    "                                      global uint8* restrict globaluint8restrictp,\n"
+    "                                      global float8 *restrict globalfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_p(global const char8* globalconstchar8p,\n"
+    "                                   global const uchar8 * globalconstuchar8p,\n"
+    "                                   global const short8*globalconstshort8p,\n"
+    "                                   global const ushort8 *globalconstushort8p,\n"
+    "                                   global const int8* globalconstint8p,\n"
+    "                                   global const uint8 * globalconstuint8p,\n"
+    "                                   global const float8* globalconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_restrict_p(global const char8 * restrict globalconstchar8restrictp,\n"
+    "                                            global const uchar8*restrict globalconstuchar8restrictp,\n"
+    "                                            global const short8 *restrict globalconstshort8restrictp,\n"
+    "                                            global const ushort8* restrict globalconstushort8restrictp,\n"
+    "                                            global const int8 * restrict globalconstint8restrictp,\n"
+    "                                            global const uint8*restrict globalconstuint8restrictp,\n"
+    "                                            global const float8 * restrict globalconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_p(global volatile char8*globalvolatilechar8p,\n"
+    "                                      global volatile uchar8 *globalvolatileuchar8p,\n"
+    "                                      global volatile short8* globalvolatileshort8p,\n"
+    "                                      global volatile ushort8 * globalvolatileushort8p,\n"
+    "                                      global volatile int8*globalvolatileint8p,\n"
+    "                                      global volatile uint8 *globalvolatileuint8p,\n"
+    "                                      global volatile float8*globalvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_restrict_p(global volatile char8 *restrict globalvolatilechar8restrictp,\n"
+    "                                               global volatile uchar8* restrict globalvolatileuchar8restrictp,\n"
+    "                                               global volatile short8 * restrict globalvolatileshort8restrictp,\n"
+    "                                               global volatile ushort8*restrict globalvolatileushort8restrictp,\n"
+    "                                               global volatile int8 *restrict globalvolatileint8restrictp,\n"
+    "                                               global volatile uint8* restrict globalvolatileuint8restrictp,\n"
+    "                                               global volatile float8 *restrict globalvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_p(global const volatile char8* globalconstvolatilechar8p,\n"
+    "                                            global const volatile uchar8 * globalconstvolatileuchar8p,\n"
+    "                                            global const volatile short8*globalconstvolatileshort8p,\n"
+    "                                            global const volatile ushort8 *globalconstvolatileushort8p,\n"
+    "                                            global const volatile int8* globalconstvolatileint8p,\n"
+    "                                            global const volatile uint8 * globalconstvolatileuint8p,\n"
+    "                                            global const volatile float8* globalconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_restrict_p(global const volatile char8 * restrict globalconstvolatilechar8restrictp,\n"
+    "                                                     global const volatile uchar8*restrict globalconstvolatileuchar8restrictp,\n"
+    "                                                     global const volatile short8 *restrict globalconstvolatileshort8restrictp,\n"
+    "                                                     global const volatile ushort8* restrict globalconstvolatileushort8restrictp,\n"
+    "                                                     global const volatile int8 * restrict globalconstvolatileint8restrictp,\n"
+    "                                                     global const volatile uint8*restrict globalconstvolatileuint8restrictp,\n"
+    "                                                     global const volatile float8 * restrict globalconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_p(local char8*localchar8p,\n"
+    "                            local uchar8 *localuchar8p,\n"
+    "                            local short8* localshort8p,\n"
+    "                            local ushort8 * localushort8p,\n"
+    "                            local int8*localint8p,\n"
+    "                            local uint8 *localuint8p,\n"
+    "                            local float8*localfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_restrict_p(local char8 *restrict localchar8restrictp,\n"
+    "                                     local uchar8* restrict localuchar8restrictp,\n"
+    "                                     local short8 * restrict localshort8restrictp,\n"
+    "                                     local ushort8*restrict localushort8restrictp,\n"
+    "                                     local int8 *restrict localint8restrictp,\n"
+    "                                     local uint8* restrict localuint8restrictp,\n"
+    "                                     local float8 *restrict localfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_p(local const char8* localconstchar8p,\n"
+    "                                  local const uchar8 * localconstuchar8p,\n"
+    "                                  local const short8*localconstshort8p,\n"
+    "                                  local const ushort8 *localconstushort8p,\n"
+    "                                  local const int8* localconstint8p,\n"
+    "                                  local const uint8 * localconstuint8p,\n"
+    "                                  local const float8* localconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_restrict_p(local const char8 * restrict localconstchar8restrictp,\n"
+    "                                           local const uchar8*restrict localconstuchar8restrictp,\n"
+    "                                           local const short8 *restrict localconstshort8restrictp,\n"
+    "                                           local const ushort8* restrict localconstushort8restrictp,\n"
+    "                                           local const int8 * restrict localconstint8restrictp,\n"
+    "                                           local const uint8*restrict localconstuint8restrictp,\n"
+    "                                           local const float8 * restrict localconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_p(local volatile char8*localvolatilechar8p,\n"
+    "                                     local volatile uchar8 *localvolatileuchar8p,\n"
+    "                                     local volatile short8* localvolatileshort8p,\n"
+    "                                     local volatile ushort8 * localvolatileushort8p,\n"
+    "                                     local volatile int8*localvolatileint8p,\n"
+    "                                     local volatile uint8 *localvolatileuint8p,\n"
+    "                                     local volatile float8*localvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_restrict_p(local volatile char8 *restrict localvolatilechar8restrictp,\n"
+    "                                              local volatile uchar8* restrict localvolatileuchar8restrictp,\n"
+    "                                              local volatile short8 * restrict localvolatileshort8restrictp,\n"
+    "                                              local volatile ushort8*restrict localvolatileushort8restrictp,\n"
+    "                                              local volatile int8 *restrict localvolatileint8restrictp,\n"
+    "                                              local volatile uint8* restrict localvolatileuint8restrictp,\n"
+    "                                              local volatile float8 *restrict localvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_p(local const volatile char8* localconstvolatilechar8p,\n"
+    "                                           local const volatile uchar8 * localconstvolatileuchar8p,\n"
+    "                                           local const volatile short8*localconstvolatileshort8p,\n"
+    "                                           local const volatile ushort8 *localconstvolatileushort8p,\n"
+    "                                           local const volatile int8* localconstvolatileint8p,\n"
+    "                                           local const volatile uint8 * localconstvolatileuint8p,\n"
+    "                                           local const volatile float8* localconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_restrict_p(local const volatile char8 * restrict localconstvolatilechar8restrictp,\n"
+    "                                                    local const volatile uchar8*restrict localconstvolatileuchar8restrictp,\n"
+    "                                                    local const volatile short8 *restrict localconstvolatileshort8restrictp,\n"
+    "                                                    local const volatile ushort8* restrict localconstvolatileushort8restrictp,\n"
+    "                                                    local const volatile int8 * restrict localconstvolatileint8restrictp,\n"
+    "                                                    local const volatile uint8*restrict localconstvolatileuint8restrictp,\n"
+    "                                                    local const volatile float8 * restrict localconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector8_d(char8 char8d,\n"
+    "                      uchar8 uchar8d,\n"
+    "                      short8 short8d,\n"
+    "                      ushort8 ushort8d,\n"
+    "                      int8 int8d,\n"
+    "                      uint8 uint8d,\n"
+    "                      float8 float8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector8_d(const char8 constchar8d,\n"
+    "                            const uchar8 constuchar8d,\n"
+    "                            const short8 constshort8d,\n"
+    "                            const ushort8 constushort8d,\n"
+    "                            const int8 constint8d,\n"
+    "                            const uint8 constuint8d,\n"
+    "                            const float8 constfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector8_d(private char8 privatechar8d,\n"
+    "                              private uchar8 privateuchar8d,\n"
+    "                              private short8 privateshort8d,\n"
+    "                              private ushort8 privateushort8d,\n"
+    "                              private int8 privateint8d,\n"
+    "                              private uint8 privateuint8d,\n"
+    "                              private float8 privatefloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector8_d(private const char8 privateconstchar8d,\n"
+    "                                    private const uchar8 privateconstuchar8d,\n"
+    "                                    private const short8 privateconstshort8d,\n"
+    "                                    private const ushort8 privateconstushort8d,\n"
+    "                                    private const int8 privateconstint8d,\n"
+    "                                    private const uint8 privateconstuint8d,\n"
+    "                                    private const float8 privateconstfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p0(constant char16*constantchar16p,\n"
+    "                                constant uchar16 *constantuchar16p,\n"
+    "                                constant short16* constantshort16p,\n"
+    "                                constant ushort16 * constantushort16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p1(constant int16*constantint16p,\n"
+    "                                constant uint16 *constantuint16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p2(constant float16*constantfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p0(constant char16 *restrict constantchar16restrictp,\n"
+    "                                         constant uchar16* restrict constantuchar16restrictp,\n"
+    "                                         constant short16 * restrict constantshort16restrictp,\n"
+    "                                         constant ushort16*restrict constantushort16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p1(constant int16 *restrict constantint16restrictp,\n"
+    "                                         constant uint16* restrict constantuint16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p2(constant float16 *restrict constantfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_p(global char16*globalchar16p,\n"
+    "                              global uchar16 *globaluchar16p,\n"
+    "                              global short16* globalshort16p,\n"
+    "                              global ushort16 * globalushort16p,\n"
+    "                              global int16*globalint16p,\n"
+    "                              global uint16 *globaluint16p,\n"
+    "                              global float16*globalfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_restrict_p(global char16 *restrict globalchar16restrictp,\n"
+    "                                       global uchar16* restrict globaluchar16restrictp,\n"
+    "                                       global short16 * restrict globalshort16restrictp,\n"
+    "                                       global ushort16*restrict globalushort16restrictp,\n"
+    "                                       global int16 *restrict globalint16restrictp,\n"
+    "                                       global uint16* restrict globaluint16restrictp,\n"
+    "                                       global float16 *restrict globalfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_p(global const char16* globalconstchar16p,\n"
+    "                                    global const uchar16 * globalconstuchar16p,\n"
+    "                                    global const short16*globalconstshort16p,\n"
+    "                                    global const ushort16 *globalconstushort16p,\n"
+    "                                    global const int16* globalconstint16p,\n"
+    "                                    global const uint16 * globalconstuint16p,\n"
+    "                                    global const float16* globalconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_restrict_p(global const char16 * restrict globalconstchar16restrictp,\n"
+    "                                             global const uchar16*restrict globalconstuchar16restrictp,\n"
+    "                                             global const short16 *restrict globalconstshort16restrictp,\n"
+    "                                             global const ushort16* restrict globalconstushort16restrictp,\n"
+    "                                             global const int16 * restrict globalconstint16restrictp,\n"
+    "                                             global const uint16*restrict globalconstuint16restrictp,\n"
+    "                                             global const float16 * restrict globalconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_p(global volatile char16*globalvolatilechar16p,\n"
+    "                                       global volatile uchar16 *globalvolatileuchar16p,\n"
+    "                                       global volatile short16* globalvolatileshort16p,\n"
+    "                                       global volatile ushort16 * globalvolatileushort16p,\n"
+    "                                       global volatile int16*globalvolatileint16p,\n"
+    "                                       global volatile uint16 *globalvolatileuint16p,\n"
+    "                                       global volatile float16*globalvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_restrict_p(global volatile char16 *restrict globalvolatilechar16restrictp,\n"
+    "                                                global volatile uchar16* restrict globalvolatileuchar16restrictp,\n"
+    "                                                global volatile short16 * restrict globalvolatileshort16restrictp,\n"
+    "                                                global volatile ushort16*restrict globalvolatileushort16restrictp,\n"
+    "                                                global volatile int16 *restrict globalvolatileint16restrictp,\n"
+    "                                                global volatile uint16* restrict globalvolatileuint16restrictp,\n"
+    "                                                global volatile float16 *restrict globalvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_p(global const volatile char16* globalconstvolatilechar16p,\n"
+    "                                             global const volatile uchar16 * globalconstvolatileuchar16p,\n"
+    "                                             global const volatile short16*globalconstvolatileshort16p,\n"
+    "                                             global const volatile ushort16 *globalconstvolatileushort16p,\n"
+    "                                             global const volatile int16* globalconstvolatileint16p,\n"
+    "                                             global const volatile uint16 * globalconstvolatileuint16p,\n"
+    "                                             global const volatile float16* globalconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_restrict_p(global const volatile char16 * restrict globalconstvolatilechar16restrictp,\n"
+    "                                                      global const volatile uchar16*restrict globalconstvolatileuchar16restrictp,\n"
+    "                                                      global const volatile short16 *restrict globalconstvolatileshort16restrictp,\n"
+    "                                                      global const volatile ushort16* restrict globalconstvolatileushort16restrictp,\n"
+    "                                                      global const volatile int16 * restrict globalconstvolatileint16restrictp,\n"
+    "                                                      global const volatile uint16*restrict globalconstvolatileuint16restrictp,\n"
+    "                                                      global const volatile float16 * restrict globalconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_p(local char16*localchar16p,\n"
+    "                             local uchar16 *localuchar16p,\n"
+    "                             local short16* localshort16p,\n"
+    "                             local ushort16 * localushort16p,\n"
+    "                             local int16*localint16p,\n"
+    "                             local uint16 *localuint16p,\n"
+    "                             local float16*localfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_restrict_p(local char16 *restrict localchar16restrictp,\n"
+    "                                      local uchar16* restrict localuchar16restrictp,\n"
+    "                                      local short16 * restrict localshort16restrictp,\n"
+    "                                      local ushort16*restrict localushort16restrictp,\n"
+    "                                      local int16 *restrict localint16restrictp,\n"
+    "                                      local uint16* restrict localuint16restrictp,\n"
+    "                                      local float16 *restrict localfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_p(local const char16* localconstchar16p,\n"
+    "                                   local const uchar16 * localconstuchar16p,\n"
+    "                                   local const short16*localconstshort16p,\n"
+    "                                   local const ushort16 *localconstushort16p,\n"
+    "                                   local const int16* localconstint16p,\n"
+    "                                   local const uint16 * localconstuint16p,\n"
+    "                                   local const float16* localconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_restrict_p(local const char16 * restrict localconstchar16restrictp,\n"
+    "                                            local const uchar16*restrict localconstuchar16restrictp,\n"
+    "                                            local const short16 *restrict localconstshort16restrictp,\n"
+    "                                            local const ushort16* restrict localconstushort16restrictp,\n"
+    "                                            local const int16 * restrict localconstint16restrictp,\n"
+    "                                            local const uint16*restrict localconstuint16restrictp,\n"
+    "                                            local const float16 * restrict localconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_p(local volatile char16*localvolatilechar16p,\n"
+    "                                      local volatile uchar16 *localvolatileuchar16p,\n"
+    "                                      local volatile short16* localvolatileshort16p,\n"
+    "                                      local volatile ushort16 * localvolatileushort16p,\n"
+    "                                      local volatile int16*localvolatileint16p,\n"
+    "                                      local volatile uint16 *localvolatileuint16p,\n"
+    "                                      local volatile float16*localvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_restrict_p(local volatile char16 *restrict localvolatilechar16restrictp,\n"
+    "                                               local volatile uchar16* restrict localvolatileuchar16restrictp,\n"
+    "                                               local volatile short16 * restrict localvolatileshort16restrictp,\n"
+    "                                               local volatile ushort16*restrict localvolatileushort16restrictp,\n"
+    "                                               local volatile int16 *restrict localvolatileint16restrictp,\n"
+    "                                               local volatile uint16* restrict localvolatileuint16restrictp,\n"
+    "                                               local volatile float16 *restrict localvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_p(local const volatile char16* localconstvolatilechar16p,\n"
+    "                                            local const volatile uchar16 * localconstvolatileuchar16p,\n"
+    "                                            local const volatile short16*localconstvolatileshort16p,\n"
+    "                                            local const volatile ushort16 *localconstvolatileushort16p,\n"
+    "                                            local const volatile int16* localconstvolatileint16p,\n"
+    "                                            local const volatile uint16 * localconstvolatileuint16p,\n"
+    "                                            local const volatile float16* localconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_restrict_p(local const volatile char16 * restrict localconstvolatilechar16restrictp,\n"
+    "                                                     local const volatile uchar16*restrict localconstvolatileuchar16restrictp,\n"
+    "                                                     local const volatile short16 *restrict localconstvolatileshort16restrictp,\n"
+    "                                                     local const volatile ushort16* restrict localconstvolatileushort16restrictp,\n"
+    "                                                     local const volatile int16 * restrict localconstvolatileint16restrictp,\n"
+    "                                                     local const volatile uint16*restrict localconstvolatileuint16restrictp,\n"
+    "                                                     local const volatile float16 * restrict localconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector16_d(char16 char16d,\n"
+    "                       uchar16 uchar16d,\n"
+    "                       short16 short16d,\n"
+    "                       ushort16 ushort16d,\n"
+    "                       int16 int16d,\n"
+    "                       uint16 uint16d,\n"
+    "                       float16 float16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector16_d(const char16 constchar16d,\n"
+    "                             const uchar16 constuchar16d,\n"
+    "                             const short16 constshort16d,\n"
+    "                             const ushort16 constushort16d,\n"
+    "                             const int16 constint16d,\n"
+    "                             const uint16 constuint16d,\n"
+    "                             const float16 constfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector16_d(private char16 privatechar16d,\n"
+    "                               private uchar16 privateuchar16d,\n"
+    "                               private short16 privateshort16d,\n"
+    "                               private ushort16 privateushort16d,\n"
+    "                               private int16 privateint16d,\n"
+    "                               private uint16 privateuint16d,\n"
+    "                               private float16 privatefloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector16_d(private const char16 privateconstchar16d,\n"
+    "                                     private const uchar16 privateconstuchar16d,\n"
+    "                                     private const short16 privateconstshort16d,\n"
+    "                                     private const ushort16 privateconstushort16d,\n"
+    "                                     private const int16 privateconstint16d,\n"
+    "                                     private const uint16 privateconstuint16d,\n"
+    "                                     private const float16 privateconstfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p0(constant typedef_type*constanttypedef_typep,\n"
+    "                               constant struct struct_type *constantstructstruct_typep,\n"
+    "                               constant typedef_struct_type* constanttypedef_struct_typep,\n"
+    "                               constant union union_type * constantunionunion_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p1(constant typedef_union_type*constanttypedef_union_typep,\n"
+    "                               constant enum enum_type *constantenumenum_typep,\n"
+    "                               constant typedef_enum_type* constanttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p0(constant typedef_type * restrict constanttypedef_typerestrictp,\n"
+    "                                        constant struct struct_type*restrict constantstructstruct_typerestrictp,\n"
+    "                                        constant typedef_struct_type *restrict constanttypedef_struct_typerestrictp,\n"
+    "                                        constant union union_type* restrict constantunionunion_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p1(constant typedef_union_type * restrict constanttypedef_union_typerestrictp,\n"
+    "                                        constant enum enum_type*restrict constantenumenum_typerestrictp,\n"
+    "                                        constant typedef_enum_type *restrict constanttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_p(global typedef_type*globaltypedef_typep,\n"
+    "                             global struct struct_type *globalstructstruct_typep,\n"
+    "                             global typedef_struct_type* globaltypedef_struct_typep,\n"
+    "                             global union union_type * globalunionunion_typep,\n"
+    "                             global typedef_union_type*globaltypedef_union_typep,\n"
+    "                             global enum enum_type *globalenumenum_typep,\n"
+    "                             global typedef_enum_type* globaltypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_restrict_p(global typedef_type * restrict globaltypedef_typerestrictp,\n"
+    "                                      global struct struct_type*restrict globalstructstruct_typerestrictp,\n"
+    "                                      global typedef_struct_type *restrict globaltypedef_struct_typerestrictp,\n"
+    "                                      global union union_type* restrict globalunionunion_typerestrictp,\n"
+    "                                      global typedef_union_type * restrict globaltypedef_union_typerestrictp,\n"
+    "                                      global enum enum_type*restrict globalenumenum_typerestrictp,\n"
+    "                                      global typedef_enum_type *restrict globaltypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_p(global const typedef_type* globalconsttypedef_typep,\n"
+    "                                   global const struct struct_type * globalconststructstruct_typep,\n"
+    "                                   global const typedef_struct_type*globalconsttypedef_struct_typep,\n"
+    "                                   global const union union_type *globalconstunionunion_typep,\n"
+    "                                   global const typedef_union_type* globalconsttypedef_union_typep,\n"
+    "                                   global const enum enum_type * globalconstenumenum_typep,\n"
+    "                                   global const typedef_enum_type*globalconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_restrict_p(global const typedef_type *restrict globalconsttypedef_typerestrictp,\n"
+    "                                            global const struct struct_type* restrict globalconststructstruct_typerestrictp,\n"
+    "                                            global const typedef_struct_type * restrict globalconsttypedef_struct_typerestrictp,\n"
+    "                                            global const union union_type*restrict globalconstunionunion_typerestrictp,\n"
+    "                                            global const typedef_union_type *restrict globalconsttypedef_union_typerestrictp,\n"
+    "                                            global const enum enum_type* restrict globalconstenumenum_typerestrictp,\n"
+    "                                            global const typedef_enum_type * restrict globalconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_p(global volatile typedef_type*globalvolatiletypedef_typep,\n"
+    "                                      global volatile struct struct_type *globalvolatilestructstruct_typep,\n"
+    "                                      global volatile typedef_struct_type* globalvolatiletypedef_struct_typep,\n"
+    "                                      global volatile union union_type * globalvolatileunionunion_typep,\n"
+    "                                      global volatile typedef_union_type*globalvolatiletypedef_union_typep,\n"
+    "                                      global volatile enum enum_type *globalvolatileenumenum_typep,\n"
+    "                                      global volatile typedef_enum_type* globalvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_restrict_p(global volatile typedef_type * restrict globalvolatiletypedef_typerestrictp,\n"
+    "                                               global volatile struct struct_type*restrict globalvolatilestructstruct_typerestrictp,\n"
+    "                                               global volatile typedef_struct_type *restrict globalvolatiletypedef_struct_typerestrictp,\n"
+    "                                               global volatile union union_type* restrict globalvolatileunionunion_typerestrictp,\n"
+    "                                               global volatile typedef_union_type * restrict globalvolatiletypedef_union_typerestrictp,\n"
+    "                                               global volatile enum enum_type*restrict globalvolatileenumenum_typerestrictp,\n"
+    "                                               global volatile typedef_enum_type *restrict globalvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_p(global const volatile typedef_type* globalconstvolatiletypedef_typep,\n"
+    "                                            global const volatile struct struct_type * globalconstvolatilestructstruct_typep,\n"
+    "                                            global const volatile typedef_struct_type*globalconstvolatiletypedef_struct_typep,\n"
+    "                                            global const volatile union union_type *globalconstvolatileunionunion_typep,\n"
+    "                                            global const volatile typedef_union_type* globalconstvolatiletypedef_union_typep,\n"
+    "                                            global const volatile enum enum_type * globalconstvolatileenumenum_typep,\n"
+    "                                            global const volatile typedef_enum_type*globalconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_restrict_p(global const volatile typedef_type *restrict globalconstvolatiletypedef_typerestrictp,\n"
+    "                                                     global const volatile struct struct_type* restrict globalconstvolatilestructstruct_typerestrictp,\n"
+    "                                                     global const volatile typedef_struct_type * restrict globalconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                     global const volatile union union_type*restrict globalconstvolatileunionunion_typerestrictp,\n"
+    "                                                     global const volatile typedef_union_type *restrict globalconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                     global const volatile enum enum_type* restrict globalconstvolatileenumenum_typerestrictp,\n"
+    "                                                     global const volatile typedef_enum_type * restrict globalconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_p(local typedef_type*localtypedef_typep,\n"
+    "                            local struct struct_type *localstructstruct_typep,\n"
+    "                            local typedef_struct_type* localtypedef_struct_typep,\n"
+    "                            local union union_type * localunionunion_typep,\n"
+    "                            local typedef_union_type*localtypedef_union_typep,\n"
+    "                            local enum enum_type *localenumenum_typep,\n"
+    "                            local typedef_enum_type* localtypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_restrict_p(local typedef_type * restrict localtypedef_typerestrictp,\n"
+    "                                     local struct struct_type*restrict localstructstruct_typerestrictp,\n"
+    "                                     local typedef_struct_type *restrict localtypedef_struct_typerestrictp,\n"
+    "                                     local union union_type* restrict localunionunion_typerestrictp,\n"
+    "                                     local typedef_union_type * restrict localtypedef_union_typerestrictp,\n"
+    "                                     local enum enum_type*restrict localenumenum_typerestrictp,\n"
+    "                                     local typedef_enum_type *restrict localtypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_p(local const typedef_type* localconsttypedef_typep,\n"
+    "                                  local const struct struct_type * localconststructstruct_typep,\n"
+    "                                  local const typedef_struct_type*localconsttypedef_struct_typep,\n"
+    "                                  local const union union_type *localconstunionunion_typep,\n"
+    "                                  local const typedef_union_type* localconsttypedef_union_typep,\n"
+    "                                  local const enum enum_type * localconstenumenum_typep,\n"
+    "                                  local const typedef_enum_type*localconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_restrict_p(local const typedef_type *restrict localconsttypedef_typerestrictp,\n"
+    "                                           local const struct struct_type* restrict localconststructstruct_typerestrictp,\n"
+    "                                           local const typedef_struct_type * restrict localconsttypedef_struct_typerestrictp,\n"
+    "                                           local const union union_type*restrict localconstunionunion_typerestrictp,\n"
+    "                                           local const typedef_union_type *restrict localconsttypedef_union_typerestrictp,\n"
+    "                                           local const enum enum_type* restrict localconstenumenum_typerestrictp,\n"
+    "                                           local const typedef_enum_type * restrict localconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_p(local volatile typedef_type*localvolatiletypedef_typep,\n"
+    "                                     local volatile struct struct_type *localvolatilestructstruct_typep,\n"
+    "                                     local volatile typedef_struct_type* localvolatiletypedef_struct_typep,\n"
+    "                                     local volatile union union_type * localvolatileunionunion_typep,\n"
+    "                                     local volatile typedef_union_type*localvolatiletypedef_union_typep,\n"
+    "                                     local volatile enum enum_type *localvolatileenumenum_typep,\n"
+    "                                     local volatile typedef_enum_type* localvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_restrict_p(local volatile typedef_type * restrict localvolatiletypedef_typerestrictp,\n"
+    "                                              local volatile struct struct_type*restrict localvolatilestructstruct_typerestrictp,\n"
+    "                                              local volatile typedef_struct_type *restrict localvolatiletypedef_struct_typerestrictp,\n"
+    "                                              local volatile union union_type* restrict localvolatileunionunion_typerestrictp,\n"
+    "                                              local volatile typedef_union_type * restrict localvolatiletypedef_union_typerestrictp,\n"
+    "                                              local volatile enum enum_type*restrict localvolatileenumenum_typerestrictp,\n"
+    "                                              local volatile typedef_enum_type *restrict localvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_p(local const volatile typedef_type* localconstvolatiletypedef_typep,\n"
+    "                                           local const volatile struct struct_type * localconstvolatilestructstruct_typep,\n"
+    "                                           local const volatile typedef_struct_type*localconstvolatiletypedef_struct_typep,\n"
+    "                                           local const volatile union union_type *localconstvolatileunionunion_typep,\n"
+    "                                           local const volatile typedef_union_type* localconstvolatiletypedef_union_typep,\n"
+    "                                           local const volatile enum enum_type * localconstvolatileenumenum_typep,\n"
+    "                                           local const volatile typedef_enum_type*localconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_restrict_p(local const volatile typedef_type *restrict localconstvolatiletypedef_typerestrictp,\n"
+    "                                                    local const volatile struct struct_type* restrict localconstvolatilestructstruct_typerestrictp,\n"
+    "                                                    local const volatile typedef_struct_type * restrict localconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                    local const volatile union union_type*restrict localconstvolatileunionunion_typerestrictp,\n"
+    "                                                    local const volatile typedef_union_type *restrict localconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                    local const volatile enum enum_type* restrict localconstvolatileenumenum_typerestrictp,\n"
+    "                                                    local const volatile typedef_enum_type * restrict localconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void derived_d(typedef_type typedef_typed,\n"
+    "                      struct struct_type structstruct_typed,\n"
+    "                      typedef_struct_type typedef_struct_typed,\n"
+    "                      union union_type unionunion_typed,\n"
+    "                      typedef_union_type typedef_union_typed,\n"
+    "                      enum enum_type enumenum_typed,\n"
+    "                      typedef_enum_type typedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_derived_d(const typedef_type consttypedef_typed,\n"
+    "                            const struct struct_type conststructstruct_typed,\n"
+    "                            const typedef_struct_type consttypedef_struct_typed,\n"
+    "                            const union union_type constunionunion_typed,\n"
+    "                            const typedef_union_type consttypedef_union_typed,\n"
+    "                            const enum enum_type constenumenum_typed,\n"
+    "                            const typedef_enum_type consttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_derived_d(private typedef_type privatetypedef_typed,\n"
+    "                              private struct struct_type privatestructstruct_typed,\n"
+    "                              private typedef_struct_type privatetypedef_struct_typed,\n"
+    "                              private union union_type privateunionunion_typed,\n"
+    "                              private typedef_union_type privatetypedef_union_typed,\n"
+    "                              private enum enum_type privateenumenum_typed,\n"
+    "                              private typedef_enum_type privatetypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_derived_d(private const typedef_type privateconsttypedef_typed,\n"
+    "                                    private const struct struct_type privateconststructstruct_typed,\n"
+    "                                    private const typedef_struct_type privateconsttypedef_struct_typed,\n"
+    "                                    private const union union_type privateconstunionunion_typed,\n"
+    "                                    private const typedef_union_type privateconsttypedef_union_typed,\n"
+    "                                    private const enum enum_type privateconstenumenum_typed,\n"
+    "                                    private const typedef_enum_type privateconsttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
-static const std::vector<std::string> image_arguments = {
-    "image2d_t", "image3d_t",        "image2d_array_t",
-    "image1d_t", "image1d_buffer_t", "image1d_array_t"
+const char * required_arg_info[][72] = {
+    // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4
+    {
+        "constant_scalar_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "constantvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "constantcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantunsignedcharp",
+        NULL
+  },
+  {
+        "constant_scalar_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "constantshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "constantintp",
+        NULL
+  },
+  {
+        "constant_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantunsignedintp",
+        NULL
+  },
+  {
+        "constant_scalar_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "constantfloatp",
+        NULL
+    },
+    {
+        "constant_scalar_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "constantvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "constantcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantunsignedcharrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "constantshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "constantintrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantunsignedintrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "constantfloatrestrictp",
+        NULL
+    },
+    {
+        "global_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "globalvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "globalcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "globalshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "globalintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globaluintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globalunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "globalfloatp",
+        NULL
+    },
+    {
+        "global_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globaluintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalfloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "globalconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "globalconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "globalconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "globalconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "globalconstfloatp",
+        NULL
+    },
+    {
+        "global_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstfloatrestrictp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalvolatilefloatp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalconstvolatilefloatp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "localvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "localcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "localshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "localintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "localfloatp",
+        NULL
+    },
+    {
+        "local_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localfloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "localconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "localconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "localconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "localconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "localconstfloatp",
+        NULL
+    },
+    {
+        "local_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstfloatrestrictp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localvolatilefloatp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localconstvolatilefloatp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "chard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "uchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "unsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "shortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "ushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "unsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "intd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "uintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "unsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "floatd",
+        NULL
+    },
+    {
+        "const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "constchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "constshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "constintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "constfloatd",
+        NULL
+    },
+    {
+        "private_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privatechard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privatefloatd",
+        NULL
+    },
+    {
+        "private_const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privateconstchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateconstshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateconstintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privateconstfloatd",
+        NULL
+    },
+    {
+        "constant_vector2_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "constantchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "constantuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "constantshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "constantushort2p",
+        NULL
+    },
+    {
+        "constant_vector2_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "constantint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "constantuint2p",
+        NULL
+    },
+    {
+        "constant_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "constantfloat2p",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "constantchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "constantuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "constantshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "constantushort2restrictp",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "constantint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "constantuint2restrictp",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "constantfloat2restrictp",
+        NULL
+    },
+    {
+        "global_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "globalchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "globaluchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "globalshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "globalushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "globalint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "globaluint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "globalfloat2p",
+        NULL
+    },
+    {
+        "global_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globaluchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globaluint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalfloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "globalconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "globalconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "globalconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "globalconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "globalconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "globalconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "globalconstfloat2p",
+        NULL
+    },
+    {
+        "global_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstfloat2restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "localchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "localuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "localshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "localushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "localint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "localuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "localfloat2p",
+        NULL
+    },
+    {
+        "local_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localfloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "localconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "localconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "localconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "localconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "localconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "localconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "localconstfloat2p",
+        NULL
+    },
+    {
+        "local_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstfloat2restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "char2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "uchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "short2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "ushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "int2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "uint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "float2d",
+        NULL
+    },
+    {
+        "const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "constchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "constuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "constshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "constushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "constint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "constuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "constfloat2d",
+        NULL
+    },
+    {
+        "private_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privatechar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privatefloat2d",
+        NULL
+    },
+    {
+        "private_const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privateconstchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateconstuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateconstshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateconstushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateconstint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateconstuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privateconstfloat2d",
+        NULL
+    },
+    {
+        "constant_vector3_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "constantchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "constantuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "constantshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "constantushort3p",
+        NULL
+    },
+    {
+        "constant_vector3_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "constantint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "constantuint3p",
+        NULL
+    },
+    {
+        "constant_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "constantfloat3p",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "constantchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "constantuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "constantshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "constantushort3restrictp",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "constantint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "constantuint3restrictp",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "constantfloat3restrictp",
+        NULL
+    },
+    {
+        "global_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "globalchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "globaluchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "globalshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "globalushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "globalint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "globaluint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "globalfloat3p",
+        NULL
+    },
+    {
+        "global_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globaluchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globaluint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalfloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "globalconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "globalconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "globalconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "globalconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "globalconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "globalconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "globalconstfloat3p",
+        NULL
+    },
+    {
+        "global_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstfloat3restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "localchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "localuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "localshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "localushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "localint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "localuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "localfloat3p",
+        NULL
+    },
+    {
+        "local_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localfloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "localconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "localconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "localconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "localconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "localconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "localconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "localconstfloat3p",
+        NULL
+    },
+    {
+        "local_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstfloat3restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "char3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "uchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "short3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "ushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "int3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "uint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "float3d",
+        NULL
+    },
+    {
+        "const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "constchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "constuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "constshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "constushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "constint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "constuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "constfloat3d",
+        NULL
+    },
+    {
+        "private_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privatechar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privatefloat3d",
+        NULL
+    },
+    {
+        "private_const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privateconstchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateconstuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateconstshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateconstushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateconstint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateconstuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privateconstfloat3d",
+        NULL
+    },
+    {
+        "constant_vector4_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "constantchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "constantuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "constantshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "constantushort4p",
+        NULL
+    },
+    {
+        "constant_vector4_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "constantint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "constantuint4p",
+        NULL
+    },
+    {
+        "constant_vector4_p2",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "constantfloat4p",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "constantchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "constantuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "constantshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "constantushort4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "constantint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "constantuint4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "constantfloat4restrictp",
+        NULL
+    },
+    {
+        "global_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "globalchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "globaluchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "globalshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "globalushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "globalint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "globaluint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "globalfloat4p",
+        NULL
+    },
+    {
+        "global_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globaluchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globaluint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalfloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "globalconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "globalconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "globalconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "globalconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "globalconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "globalconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "globalconstfloat4p",
+        NULL
+    },
+    {
+        "global_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstfloat4restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "localchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "localuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "localshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "localushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "localint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "localuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "localfloat4p",
+        NULL
+    },
+    {
+        "local_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localfloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "localconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "localconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "localconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "localconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "localconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "localconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "localconstfloat4p",
+        NULL
+    },
+    {
+        "local_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstfloat4restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "char4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "uchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "short4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "ushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "int4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "uint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "float4d",
+        NULL
+    },
+    {
+        "const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "constchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "constuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "constshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "constushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "constint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "constuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "constfloat4d",
+        NULL
+    },
+    {
+        "private_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privatechar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privatefloat4d",
+        NULL
+    },
+    {
+        "private_const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privateconstchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateconstuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateconstshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateconstushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateconstint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateconstuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privateconstfloat4d",
+        NULL
+    },
+    {
+        "constant_vector8_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "constantchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "constantuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "constantshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "constantushort8p",
+        NULL
+    },
+    {
+        "constant_vector8_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "constantint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "constantuint8p",
+    NULL
+    },
+    {
+        "constant_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "constantfloat8p",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "constantchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "constantuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "constantshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "constantushort8restrictp",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "constantint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "constantuint8restrictp",
+    NULL
+    },
+    {
+        "constant_vector8_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "constantfloat8restrictp",
+        NULL
+    },
+    {
+        "global_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "globalchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "globaluchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "globalshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "globalushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "globalint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "globaluint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "globalfloat8p",
+        NULL
+    },
+    {
+        "global_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globaluchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globaluint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalfloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "globalconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "globalconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "globalconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "globalconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "globalconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "globalconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "globalconstfloat8p",
+        NULL
+    },
+    {
+        "global_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstfloat8restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "localchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "localuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "localshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "localushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "localint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "localuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "localfloat8p",
+        NULL
+    },
+    {
+        "local_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localfloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "localconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "localconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "localconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "localconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "localconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "localconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "localconstfloat8p",
+        NULL
+    },
+    {
+        "local_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstfloat8restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "char8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "uchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "short8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "ushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "int8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "uint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "float8d",
+        NULL
+    },
+    {
+        "const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "constchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "constuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "constshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "constushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "constint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "constuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "constfloat8d",
+        NULL
+    },
+    {
+        "private_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privatechar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privatefloat8d",
+        NULL
+    },
+    {
+        "private_const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privateconstchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateconstuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateconstshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateconstushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateconstint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateconstuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privateconstfloat8d",
+        NULL
+    },
+    {
+        "constant_vector16_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "constantchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "constantuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "constantshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "constantushort16p",
+        NULL
+    },
+    {
+        "constant_vector16_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "constantint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "constantuint16p",
+        NULL
+    },
+    {
+        "constant_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "constantfloat16p",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "constantchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "constantuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "constantshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "constantushort16restrictp",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "constantint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "constantuint16restrictp",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "constantfloat16restrictp",
+        NULL
+    },
+    {
+        "global_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "globalchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "globaluchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "globalshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "globalushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "globalint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "globaluint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "globalfloat16p",
+        NULL
+    },
+    {
+        "global_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globaluchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globaluint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalfloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "globalconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "globalconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "globalconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "globalconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "globalconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "globalconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "globalconstfloat16p",
+        NULL
+    },
+    {
+        "global_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstfloat16restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "localchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "localuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "localshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "localushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "localint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "localuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "localfloat16p",
+        NULL
+    },
+    {
+        "local_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localfloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "localconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "localconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "localconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "localconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "localconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "localconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "localconstfloat16p",
+        NULL
+    },
+    {
+        "local_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstfloat16restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "char16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "uchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "short16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "ushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "int16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "uint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "float16d",
+        NULL
+    },
+    {
+        "const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "constchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "constuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "constshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "constushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "constint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "constuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "constfloat16d",
+        NULL
+    },
+    {
+        "private_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privatechar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privatefloat16d",
+        NULL
+    },
+    {
+        "private_const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privateconstchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateconstuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateconstshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateconstushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateconstint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateconstuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privateconstfloat16d",
+        NULL
+    },
+    {
+        "constant_derived_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "constanttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "constantstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "constanttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "constantunionunion_typep",
+        NULL
+    },
+    {
+        "constant_derived_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "constanttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "constantenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "constanttypedef_enum_typep",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "constanttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "constantstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "constanttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "constantunionunion_typerestrictp",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "constanttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "constantenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "constanttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "globaltypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "globalstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "globaltypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "globalunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "globaltypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "globalenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "globaltypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globaltypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globaltypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globaltypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globaltypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "globalconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "globalconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "globalconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "globalconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "globalconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "globalconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "globalconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "localtypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "localstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "localtypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "localunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "localtypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "localenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "localtypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localtypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localtypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localtypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localtypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "localconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "localconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "localconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "localconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "localconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "localconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "localconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "typedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "structstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "typedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "unionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "typedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "enumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "typedef_enum_typed",
+        NULL
+    },
+    {
+        "const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "consttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "conststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "consttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "constunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "consttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "constenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "consttypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privatetypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privatestructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privatetypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privatetypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privatetypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privateconsttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privateconststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privateconsttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateconstunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privateconsttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateconstenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privateconsttypedef_enum_typed",
+        NULL
+    },
-static const std::vector<cl_kernel_arg_access_qualifier> access_qualifiers = {
+// Support for optional image data type
+const char * image_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n"
+    "kernel void image_d(read_only image2d_t image2d_td0,\n"
+    "                    write_only image2d_t image2d_td1,\n"
+    "                    read_only image3d_t image3d_td2,\n"
+    "                    write_only image3d_t image3d_td3,\n"
+    "                    read_only image2d_array_t image2d_array_td4,\n"
+    "                    write_only image2d_array_t image2d_array_td5,\n"
+    "                    read_only image1d_t image1d_td6,\n"
+    "                    write_only image1d_t image1d_td7,\n"
+    "                    read_only image1d_buffer_t image1d_buffer_td8,\n"
+    "                    write_only image1d_buffer_t image1d_buffer_td9,\n"
+    "                    read_only image1d_array_t image1d_array_td10,\n"
+    "                    write_only image1d_array_t image1d_array_td11,\n"
+    "                    sampler_t sampler_td12)\n"
+    "{}\n",
+    "\n"
-static const std::vector<cl_kernel_arg_type_qualifier> type_qualifiers = {
+const char * image_arg_info[][67] = {
+    {
+        "image_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td4",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td5",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td6",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td7",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td8",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td9",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td10",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td11",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "sampler_t", "sampler_td12",
+        NULL
+    },
-static const std::vector<cl_kernel_arg_type_qualifier> pipe_qualifiers = {
+// Support for optional double data type
+const char * double_kernel_args[] = {
+    "kernel void double_scalar_p(constant double*constantdoublep,\n"
+    "                            constant double *restrict constantdoublerestrictp,\n"
+    "                            global double*globaldoublep,\n"
+    "                            global double *restrict globaldoublerestrictp,\n"
+    "                            global const double* globalconstdoublep,\n"
+    "                            global const double * restrict globalconstdoublerestrictp,\n"
+    "                            global volatile double*globalvolatiledoublep,\n"
+    "                            global volatile double *restrict globalvolatiledoublerestrictp,\n"
+    "                            global const volatile double* globalconstvolatiledoublep)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_p2(global const volatile double * restrict globalconstvolatiledoublerestrictp,\n"
+    "                             local double*localdoublep,\n"
+    "                             local double *restrict localdoublerestrictp,\n"
+    "                             local const double* localconstdoublep,\n"
+    "                             local const double * restrict localconstdoublerestrictp,\n"
+    "                             local volatile double*localvolatiledoublep,\n"
+    "                             local volatile double *restrict localvolatiledoublerestrictp,\n"
+    "                             local const volatile double* localconstvolatiledoublep,\n"
+    "                             local const volatile double * restrict localconstvolatiledoublerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_d(double doubled,\n"
+    "                            const double constdoubled,\n"
+    "                            private double privatedoubled,\n"
+    "                            private const double privateconstdoubled)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p(constant double2*constantdouble2p,\n"
+    "                             constant double2 *restrict constantdouble2restrictp,\n"
+    "                             global double2*globaldouble2p,\n"
+    "                             global double2 *restrict globaldouble2restrictp,\n"
+    "                             global const double2* globalconstdouble2p,\n"
+    "                             global const double2 * restrict globalconstdouble2restrictp,\n"
+    "                             global volatile double2*globalvolatiledouble2p,\n"
+    "                             global volatile double2 *restrict globalvolatiledouble2restrictp,\n"
+    "                             global const volatile double2* globalconstvolatiledouble2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p2(global const volatile double2 * restrict globalconstvolatiledouble2restrictp,\n"
+    "                              local double2*localdouble2p,\n"
+    "                              local double2 *restrict localdouble2restrictp,\n"
+    "                              local const double2* localconstdouble2p,\n"
+    "                              local const double2 * restrict localconstdouble2restrictp,\n"
+    "                              local volatile double2*localvolatiledouble2p,\n"
+    "                              local volatile double2 *restrict localvolatiledouble2restrictp,\n"
+    "                              local const volatile double2* localconstvolatiledouble2p,\n"
+    "                              local const volatile double2 * restrict localconstvolatiledouble2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_d(double2 double2d,\n"
+    "                             const double2 constdouble2d,\n"
+    "                             private double2 privatedouble2d,\n"
+    "                             private const double2 privateconstdouble2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p(constant double3*constantdouble3p,\n"
+    "                             constant double3 *restrict constantdouble3restrictp,\n"
+    "                             global double3*globaldouble3p,\n"
+    "                             global double3 *restrict globaldouble3restrictp,\n"
+    "                             global const double3* globalconstdouble3p,\n"
+    "                             global const double3 * restrict globalconstdouble3restrictp,\n"
+    "                             global volatile double3*globalvolatiledouble3p,\n"
+    "                             global volatile double3 *restrict globalvolatiledouble3restrictp,\n"
+    "                             global const volatile double3* globalconstvolatiledouble3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p2(global const volatile double3 * restrict globalconstvolatiledouble3restrictp,\n"
+    "                              local double3*localdouble3p,\n"
+    "                              local double3 *restrict localdouble3restrictp,\n"
+    "                              local const double3* localconstdouble3p,\n"
+    "                              local const double3 * restrict localconstdouble3restrictp,\n"
+    "                              local volatile double3*localvolatiledouble3p,\n"
+    "                              local volatile double3 *restrict localvolatiledouble3restrictp,\n"
+    "                              local const volatile double3* localconstvolatiledouble3p,\n"
+    "                              local const volatile double3 * restrict localconstvolatiledouble3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_d(double3 double3d,\n"
+    "                             const double3 constdouble3d,\n"
+    "                             private double3 privatedouble3d,\n"
+    "                             private const double3 privateconstdouble3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p(constant double4*constantdouble4p,\n"
+    "                             constant double4 *restrict constantdouble4restrictp,\n"
+    "                             global double4*globaldouble4p,\n"
+    "                             global double4 *restrict globaldouble4restrictp,\n"
+    "                             global const double4* globalconstdouble4p,\n"
+    "                             global const double4 * restrict globalconstdouble4restrictp,\n"
+    "                             global volatile double4*globalvolatiledouble4p,\n"
+    "                             global volatile double4 *restrict globalvolatiledouble4restrictp,\n"
+    "                             global const volatile double4* globalconstvolatiledouble4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p2(global const volatile double4 * restrict globalconstvolatiledouble4restrictp,\n"
+    "                              local double4*localdouble4p,\n"
+    "                              local double4 *restrict localdouble4restrictp,\n"
+    "                              local const double4* localconstdouble4p,\n"
+    "                              local const double4 * restrict localconstdouble4restrictp,\n"
+    "                              local volatile double4*localvolatiledouble4p,\n"
+    "                              local volatile double4 *restrict localvolatiledouble4restrictp,\n"
+    "                              local const volatile double4* localconstvolatiledouble4p,\n"
+    "                              local const volatile double4 * restrict localconstvolatiledouble4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_d(double4 double4d,\n"
+    "                             const double4 constdouble4d,\n"
+    "                             private double4 privatedouble4d,\n"
+    "                             private const double4 privateconstdouble4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p(constant double8*constantdouble8p,\n"
+    "                             constant double8 *restrict constantdouble8restrictp,\n"
+    "                             global double8*globaldouble8p,\n"
+    "                             global double8 *restrict globaldouble8restrictp,\n"
+    "                             global const double8* globalconstdouble8p,\n"
+    "                             global const double8 * restrict globalconstdouble8restrictp,\n"
+    "                             global volatile double8*globalvolatiledouble8p,\n"
+    "                             global volatile double8 *restrict globalvolatiledouble8restrictp,\n"
+    "                             global const volatile double8* globalconstvolatiledouble8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p2(global const volatile double8 * restrict globalconstvolatiledouble8restrictp,\n"
+    "                              local double8*localdouble8p,\n"
+    "                              local double8 *restrict localdouble8restrictp,\n"
+    "                              local const double8* localconstdouble8p,\n"
+    "                              local const double8 * restrict localconstdouble8restrictp,\n"
+    "                              local volatile double8*localvolatiledouble8p,\n"
+    "                              local volatile double8 *restrict localvolatiledouble8restrictp,\n"
+    "                              local const volatile double8* localconstvolatiledouble8p,\n"
+    "                              local const volatile double8 * restrict localconstvolatiledouble8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_d(double8 double8d,\n"
+    "                             const double8 constdouble8d,\n"
+    "                             private double8 privatedouble8d,\n"
+    "                             private const double8 privateconstdouble8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p(constant double16*constantdouble16p,\n"
+    "                              constant double16 *restrict constantdouble16restrictp,\n"
+    "                              global double16*globaldouble16p,\n"
+    "                              global double16 *restrict globaldouble16restrictp,\n"
+    "                              global const double16* globalconstdouble16p,\n"
+    "                              global const double16 * restrict globalconstdouble16restrictp,\n"
+    "                              global volatile double16*globalvolatiledouble16p,\n"
+    "                              global volatile double16 *restrict globalvolatiledouble16restrictp,\n"
+    "                              global const volatile double16* globalconstvolatiledouble16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p2(global const volatile double16 * restrict globalconstvolatiledouble16restrictp,\n"
+    "                               local double16*localdouble16p,\n"
+    "                               local double16 *restrict localdouble16restrictp,\n"
+    "                               local const double16* localconstdouble16p,\n"
+    "                               local const double16 * restrict localconstdouble16restrictp,\n"
+    "                               local volatile double16*localvolatiledouble16p,\n"
+    "                               local volatile double16 *restrict localvolatiledouble16restrictp,\n"
+    "                               local const volatile double16* localconstvolatiledouble16p,\n"
+    "                               local const volatile double16 * restrict localconstvolatiledouble16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_d(double16 double16d,\n"
+    "                              const double16 constdouble16d,\n"
+    "                              private double16 privatedouble16d,\n"
+    "                              private const double16 privateconstdouble16d)\n"
+    "{}\n",
+    "\n"
-static std::string
-get_address_qualifier(cl_kernel_arg_address_qualifier address_qualifier)
-    std::string ret;
-    if (address_qualifier == CL_KERNEL_ARG_ADDRESS_GLOBAL)
-        ret = "global";
-    else if (address_qualifier == CL_KERNEL_ARG_ADDRESS_CONSTANT)
-        ret = "constant";
-    else if (address_qualifier == CL_KERNEL_ARG_ADDRESS_LOCAL)
-        ret = "local";
-    else if (address_qualifier == CL_KERNEL_ARG_ADDRESS_PRIVATE)
-        ret = "private";
-    return ret;
-static std::string
-get_access_qualifier(cl_kernel_arg_access_qualifier qualifier)
-    std::string ret;
-    if (qualifier == CL_KERNEL_ARG_ACCESS_READ_ONLY) ret = "read_only";
-    if (qualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY) ret = "write_only";
-    if (qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE) ret = "read_write";
-    return ret;
-static std::string
-get_type_qualifier_prefix(cl_kernel_arg_type_qualifier type_qualifier)
-    std::string ret;
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_CONST) ret += "const ";
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_VOLATILE) ret += "volatile ";
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_PIPE) ret += "pipe ";
-    return ret;
-static std::string
-get_type_qualifier_postfix(cl_kernel_arg_type_qualifier type_qualifier)
-    std::string ret;
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_RESTRICT) ret = "restrict";
-    return ret;
-class KernelArgInfo {
-    KernelArgInfo(cl_kernel_arg_address_qualifier input_address_qualifier,
-                  cl_kernel_arg_access_qualifier input_access_qualifier,
-                  cl_kernel_arg_type_qualifier input_type_qualifier,
-                  const std::string& input_arg_type, const int argument_number,
-                  const std::string& input_arg_string = "")
-        : address_qualifier(input_address_qualifier),
-          access_qualifier(input_access_qualifier),
-          type_qualifier(input_type_qualifier), arg_string(input_arg_string)
-    {
-        strcpy(arg_type, input_arg_type.c_str());
-        std::string input_arg_name =
-            KERNEL_ARGUMENT_NAME + std::to_string(argument_number);
-        strcpy(arg_name, input_arg_name.c_str());
-    };
-    KernelArgInfo() = default;
-    cl_kernel_arg_address_qualifier address_qualifier;
-    cl_kernel_arg_access_qualifier access_qualifier;
-    cl_kernel_arg_type_qualifier type_qualifier;
-    char arg_type[KERNEL_ARGUMENT_LENGTH];
-    char arg_name[KERNEL_ARGUMENT_LENGTH];
-    std::string arg_string;
+// Support for optional 3D image data type
+const char * image_3D_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n"
+    "kernel void image_d(read_only image3d_t image3d_td2,\n"
+    "                    write_only image3d_t image3d_td3)\n"
+    "{}\n",
+    "\n"
-static std::string generate_argument(const KernelArgInfo& kernel_arg)
-    std::string ret;
-    const bool is_image = strstr(kernel_arg.arg_type, "image")
-        || strstr(kernel_arg.arg_type, "sampler");
-    std::string address_qualifier = "";
-    // Image Objects are always allocated from the global address space so the
-    // qualifier should not be specified
-    if (!is_image)
+const char * image_3D_arg_info[][67] = {
-        address_qualifier = get_address_qualifier(kernel_arg.address_qualifier);
+        "image_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3",
+        NULL
+    },
+const char * double_arg_info[][77] = {
+    {
+        "double_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "constantdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "constantdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "globaldoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globaldoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "globalconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstdoublerestrictp",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalvolatiledoublep",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalconstvolatiledoublep",
+        NULL
+    },
+    {
+        "double_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "localdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "localconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localconstvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstvolatiledoublerestrictp",
+        NULL
+    },
+    {
+        "double_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "doubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "constdoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privatedoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privateconstdoubled",
+        NULL
+    },
+    {
+        "double_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "constantdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "constantdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "globaldouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globaldouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "globalconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalconstvolatiledouble2p",
+        NULL
+    },
+    {
+        "double_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "localdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "localconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localconstvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstvolatiledouble2restrictp",
+        NULL
+    },
+    {
+        "double_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "double2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "constdouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privatedouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privateconstdouble2d",
+        NULL
+    },
+    {
+        "double_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "constantdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "constantdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "globaldouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globaldouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "globalconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalconstvolatiledouble3p",
+        NULL
+    },
+    {
+        "double_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "localdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "localconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localconstvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstvolatiledouble3restrictp",
+        NULL
+    },
+    {
+        "double_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "double3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "constdouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privatedouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privateconstdouble3d",
+        NULL
+    },
+    {
+        "double_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "constantdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "constantdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "globaldouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globaldouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "globalconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalconstvolatiledouble4p",
+        NULL
+    },
+    {
+        "double_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "localdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "localconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localconstvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstvolatiledouble4restrictp",
+        NULL
+    },
+    {
+        "double_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "double4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "constdouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privatedouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privateconstdouble4d",
+        NULL
+    },
+    {
+        "double_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "constantdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "constantdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "globaldouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globaldouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "globalconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalconstvolatiledouble8p",
+        NULL
+    },
+    {
+        "double_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "localdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "localconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localconstvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstvolatiledouble8restrictp",
+        NULL
+    },
+    {
+        "double_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "double8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "constdouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privatedouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privateconstdouble8d",
+        NULL
+    },
+    {
+        "double_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "constantdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "constantdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "globaldouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globaldouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "globalconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalconstvolatiledouble16p",
+        NULL
+    },
+    {
+        "double_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "localdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "localconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localconstvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstvolatiledouble16restrictp",
+        NULL
+    },
+    {
+        "double_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "double16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "constdouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privatedouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privateconstdouble16d",
+        NULL
+    },
+// Support for optional half data type
+const char * half_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
+    "\n"
+    "kernel void half_scalar_p(constant half*constanthalfp,\n"
+    "                          constant half *restrict constanthalfrestrictp,\n"
+    "                          global half*globalhalfp,\n"
+    "                          global half *restrict globalhalfrestrictp,\n"
+    "                          global const half* globalconsthalfp,\n"
+    "                          global const half * restrict globalconsthalfrestrictp,\n"
+    "                          global volatile half*globalvolatilehalfp,\n"
+    "                          global volatile half *restrict globalvolatilehalfrestrictp,\n"
+    "                          global const volatile half* globalconstvolatilehalfp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_scalar_p2(global const volatile half * restrict globalconstvolatilehalfrestrictp,\n"
+    "                           local half*localhalfp,\n"
+    "                           local half *restrict localhalfrestrictp,\n"
+    "                           local const half* localconsthalfp,\n"
+    "                           local const half * restrict localconsthalfrestrictp,\n"
+    "                           local volatile half*localvolatilehalfp,\n"
+    "                           local volatile half *restrict localvolatilehalfrestrictp,\n"
+    "                           local const volatile half* localconstvolatilehalfp,\n"
+    "                           local const volatile half * restrict localconstvolatilehalfrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p(constant half2*constanthalf2p,\n"
+    "                           constant half2 *restrict constanthalf2restrictp,\n"
+    "                           global half2*globalhalf2p,\n"
+    "                           global half2 *restrict globalhalf2restrictp,\n"
+    "                           global const half2* globalconsthalf2p,\n"
+    "                           global const half2 * restrict globalconsthalf2restrictp,\n"
+    "                           global volatile half2*globalvolatilehalf2p,\n"
+    "                           global volatile half2 *restrict globalvolatilehalf2restrictp,\n"
+    "                           global const volatile half2* globalconstvolatilehalf2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p2(global const volatile half2 * restrict globalconstvolatilehalf2restrictp,\n"
+    "                            local half2*localhalf2p,\n"
+    "                            local half2 *restrict localhalf2restrictp,\n"
+    "                            local const half2* localconsthalf2p,\n"
+    "                            local const half2 * restrict localconsthalf2restrictp,\n"
+    "                            local volatile half2*localvolatilehalf2p,\n"
+    "                            local volatile half2 *restrict localvolatilehalf2restrictp,\n"
+    "                            local const volatile half2* localconstvolatilehalf2p,\n"
+    "                            local const volatile half2 * restrict localconstvolatilehalf2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p(constant half3*constanthalf3p,\n"
+    "                           constant half3 *restrict constanthalf3restrictp,\n"
+    "                           global half3*globalhalf3p,\n"
+    "                           global half3 *restrict globalhalf3restrictp,\n"
+    "                           global const half3* globalconsthalf3p,\n"
+    "                           global const half3 * restrict globalconsthalf3restrictp,\n"
+    "                           global volatile half3*globalvolatilehalf3p,\n"
+    "                           global volatile half3 *restrict globalvolatilehalf3restrictp,\n"
+    "                           global const volatile half3* globalconstvolatilehalf3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p2(global const volatile half3 * restrict globalconstvolatilehalf3restrictp,\n"
+    "                            local half3*localhalf3p,\n"
+    "                            local half3 *restrict localhalf3restrictp,\n"
+    "                            local const half3* localconsthalf3p,\n"
+    "                            local const half3 * restrict localconsthalf3restrictp,\n"
+    "                            local volatile half3*localvolatilehalf3p,\n"
+    "                            local volatile half3 *restrict localvolatilehalf3restrictp,\n"
+    "                            local const volatile half3* localconstvolatilehalf3p,\n"
+    "                            local const volatile half3 * restrict localconstvolatilehalf3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p(constant half4*constanthalf4p,\n"
+    "                           constant half4 *restrict constanthalf4restrictp,\n"
+    "                           global half4*globalhalf4p,\n"
+    "                           global half4 *restrict globalhalf4restrictp,\n"
+    "                           global const half4* globalconsthalf4p,\n"
+    "                           global const half4 * restrict globalconsthalf4restrictp,\n"
+    "                           global volatile half4*globalvolatilehalf4p,\n"
+    "                           global volatile half4 *restrict globalvolatilehalf4restrictp,\n"
+    "                           global const volatile half4* globalconstvolatilehalf4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p2(global const volatile half4 * restrict globalconstvolatilehalf4restrictp,\n"
+    "                            local half4*localhalf4p,\n"
+    "                            local half4 *restrict localhalf4restrictp,\n"
+    "                            local const half4* localconsthalf4p,\n"
+    "                            local const half4 * restrict localconsthalf4restrictp,\n"
+    "                            local volatile half4*localvolatilehalf4p,\n"
+    "                            local volatile half4 *restrict localvolatilehalf4restrictp,\n"
+    "                            local const volatile half4* localconstvolatilehalf4p,\n"
+    "                            local const volatile half4 * restrict localconstvolatilehalf4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p(constant half8*constanthalf8p,\n"
+    "                           constant half8 *restrict constanthalf8restrictp,\n"
+    "                           global half8*globalhalf8p,\n"
+    "                           global half8 *restrict globalhalf8restrictp,\n"
+    "                           global const half8* globalconsthalf8p,\n"
+    "                           global const half8 * restrict globalconsthalf8restrictp,\n"
+    "                           global volatile half8*globalvolatilehalf8p,\n"
+    "                           global volatile half8 *restrict globalvolatilehalf8restrictp,\n"
+    "                           global const volatile half8* globalconstvolatilehalf8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p2(global const volatile half8 * restrict globalconstvolatilehalf8restrictp,\n"
+    "                            local half8*localhalf8p,\n"
+    "                            local half8 *restrict localhalf8restrictp,\n"
+    "                            local const half8* localconsthalf8p,\n"
+    "                            local const half8 * restrict localconsthalf8restrictp,\n"
+    "                            local volatile half8*localvolatilehalf8p,\n"
+    "                            local volatile half8 *restrict localvolatilehalf8restrictp,\n"
+    "                            local const volatile half8* localconstvolatilehalf8p,\n"
+    "                            local const volatile half8 * restrict localconstvolatilehalf8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p(constant half16*constanthalf16p,\n"
+    "                            constant half16 *restrict constanthalf16restrictp,\n"
+    "                            global half16*globalhalf16p,\n"
+    "                            global half16 *restrict globalhalf16restrictp,\n"
+    "                            global const half16* globalconsthalf16p,\n"
+    "                            global const half16 * restrict globalconsthalf16restrictp,\n"
+    "                            global volatile half16*globalvolatilehalf16p,\n"
+    "                            global volatile half16 *restrict globalvolatilehalf16restrictp,\n"
+    "                            global const volatile half16* globalconstvolatilehalf16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p2(global const volatile half16 * restrict globalconstvolatilehalf16restrictp,\n"
+    "                             local half16*localhalf16p,\n"
+    "                             local half16 *restrict localhalf16restrictp,\n"
+    "                             local const half16* localconsthalf16p,\n"
+    "                             local const half16 * restrict localconsthalf16restrictp,\n"
+    "                             local volatile half16*localvolatilehalf16p,\n"
+    "                             local volatile half16 *restrict localvolatilehalf16restrictp,\n"
+    "                             local const volatile half16* localconstvolatilehalf16p,\n"
+    "                             local const volatile half16 * restrict localconstvolatilehalf16restrictp)\n"
+    "{}\n",
+    "\n"
+const char * half_arg_info[][77] = {
+    {
+        "half_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "constanthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "constanthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "globalhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "globalconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalconstvolatilehalfp",
+        NULL
+    },
+    {
+        "half_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconstvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "localhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "localconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localconstvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconstvolatilehalfrestrictp",
+        NULL
+    },
+    {
+        "half_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "constanthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "constanthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "globalhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "globalconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalconstvolatilehalf2p",
+        NULL
+    },
+    {
+        "half_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconstvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "localhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "localconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localconstvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconstvolatilehalf2restrictp",
+        NULL
+    },
+    {
+        "half_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "constanthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "constanthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "globalhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "globalconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalconstvolatilehalf3p",
+        NULL
+    },
+    {
+        "half_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconstvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "localhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "localconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localconstvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconstvolatilehalf3restrictp",
+        NULL
+    },
+    {
+        "half_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "constanthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "constanthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "globalhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "globalconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalconstvolatilehalf4p",
+        NULL
+    },
+    {
+        "half_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconstvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "localhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "localconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localconstvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconstvolatilehalf4restrictp",
+        NULL
+    },
+    {
+        "half_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "constanthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "constanthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "globalhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "globalconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalconstvolatilehalf8p",
+        NULL
+    },
+    {
+        "half_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconstvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "localhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "localconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localconstvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconstvolatilehalf8restrictp",
+        NULL
+    },
+    {
+        "half_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "constanthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "constanthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "globalhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "globalconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalconstvolatilehalf16p",
+        NULL
+    },
+    {
+        "half_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconstvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "localhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "localconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localconstvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconstvolatilehalf16restrictp",
+        NULL
+    },
+const char * long_kernel_args[] = {
+        "kernel void constant_scalar_p2(constant long* constantlongp,\n"
+        "                              constant ulong * constantulongp)\n"
+      "{}\n",
+        "kernel void constant_scalar_p3(constant unsigned long*constantunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_scalar_restrict_p2(constant long*restrict constantlongrestrictp,\n"
+        "                                       constant ulong *restrict constantulongrestrictp)\n"
+        "{}\n",
+        "kernel void constant_scalar_restrict_p3(constant unsigned long* restrict constantunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_scalar_p(global long* globallongp,\n"
+        "                            global ulong * globalulongp,\n"
+        "                            global unsigned long*globalunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_scalar_restrict_p(global long*restrict globallongrestrictp,\n"
+        "                                     global ulong *restrict globalulongrestrictp,\n"
+        "                                     global unsigned long* restrict globalunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_scalar_p(global const long* globalconstlongp,\n"
+        "                                  global const ulong * globalconstulongp,\n"
+        "                                  global const unsigned long*globalconstunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_scalar_restrict_p(global const long*restrict globalconstlongrestrictp,\n"
+        "                                           global const ulong *restrict globalconstulongrestrictp,\n"
+        "                                           global const unsigned long* restrict globalconstunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_scalar_p(global volatile long* globalvolatilelongp,\n"
+        "                                     global volatile ulong * globalvolatileulongp,\n"
+        "                                     global volatile unsigned long*globalvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_scalar_restrict_p(global volatile long*restrict globalvolatilelongrestrictp,\n"
+        "                                              global volatile ulong *restrict globalvolatileulongrestrictp,\n"
+        "                                              global volatile unsigned long* restrict globalvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_scalar_p(global const volatile long* globalconstvolatilelongp,\n"
+        "                                           global const volatile ulong * globalconstvolatileulongp,\n"
+        "                                           global const volatile unsigned long*globalconstvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_scalar_restrict_p(global const volatile long*restrict globalconstvolatilelongrestrictp,\n"
+        "                                                    global const volatile ulong *restrict globalconstvolatileulongrestrictp,\n"
+        "                                                    global const volatile unsigned long* restrict globalconstvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_scalar_p(local long* locallongp,\n"
+        "                           local ulong * localulongp,\n"
+        "                           local unsigned long*localunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_scalar_restrict_p(local long*restrict locallongrestrictp,\n"
+        "                                    local ulong *restrict localulongrestrictp,\n"
+        "                                    local unsigned long* restrict localunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_scalar_p(local const long* localconstlongp,\n"
+        "                                 local const ulong * localconstulongp,\n"
+        "                                 local const unsigned long*localconstunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_scalar_restrict_p(local const long*restrict localconstlongrestrictp,\n"
+        "                                          local const ulong *restrict localconstulongrestrictp,\n"
+        "                                          local const unsigned long* restrict localconstunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_scalar_p(local volatile long* localvolatilelongp,\n"
+        "                                    local volatile ulong * localvolatileulongp,\n"
+        "                                    local volatile unsigned long*localvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_scalar_restrict_p(local volatile long*restrict localvolatilelongrestrictp,\n"
+        "                                             local volatile ulong *restrict localvolatileulongrestrictp,\n"
+        "                                             local volatile unsigned long* restrict localvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_scalar_p(local const volatile long* localconstvolatilelongp,\n"
+        "                                          local const volatile ulong * localconstvolatileulongp,\n"
+        "                                          local const volatile unsigned long*localconstvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_scalar_restrict_p(local const volatile long*restrict localconstvolatilelongrestrictp,\n"
+        "                                                   local const volatile ulong *restrict localconstvolatileulongrestrictp,\n"
+        "                                                   local const volatile unsigned long* restrict localconstvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void scalar_d(long longd,\n"
+        "                     ulong ulongd,\n"
+        "                     unsigned long unsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_scalar_d(const long constlongd,\n"
+        "                           const ulong constulongd,\n"
+        "                           const unsigned long constunsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_scalar_d(private long privatelongd,\n"
+        "                             private ulong privateulongd,\n"
+        "                             private unsigned long privateunsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_scalar_d(private const long privateconstlongd,\n"
+        "                                   private const ulong privateconstulongd,\n"
+        "                                   private const unsigned long privateconstunsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector2_p1(constant long2* constantlong2p,\n"
+        "                               constant ulong2 * constantulong2p)\n"
+      "{}\n",
+        "\n"
+        "kernel void constant_vector2_restrict_p1(constant long2 * restrict constantlong2restrictp,\n"
+        "                                        constant ulong2*restrict constantulong2restrictp)\n"
+      "{}\n",
+        "\n"
+        "kernel void global_vector2_p(global long2* globallong2p,\n"
+        "                             global ulong2 * globalulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector2_restrict_p(global long2 * restrict globallong2restrictp,\n"
+        "                                      global ulong2*restrict globalulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector2_p(global const long2*globalconstlong2p,\n"
+        "                                   global const ulong2 *globalconstulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector2_restrict_p(global const long2 *restrict globalconstlong2restrictp,\n"
+        "                                            global const ulong2* restrict globalconstulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector2_p(global volatile long2* globalvolatilelong2p,\n"
+        "                                      global volatile ulong2 * globalvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector2_restrict_p(global volatile long2 * restrict globalvolatilelong2restrictp,\n"
+        "                                               global volatile ulong2*restrict globalvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector2_p(global const volatile long2*globalconstvolatilelong2p,\n"
+        "                                            global const volatile ulong2 *globalconstvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector2_restrict_p(global const volatile long2 *restrict globalconstvolatilelong2restrictp,\n"
+        "                                                     global const volatile ulong2* restrict globalconstvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector2_p(local long2* locallong2p,\n"
+        "                            local ulong2 * localulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector2_restrict_p(local long2 * restrict locallong2restrictp,\n"
+        "                                     local ulong2*restrict localulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector2_p(local const long2*localconstlong2p,\n"
+        "                                  local const ulong2 *localconstulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector2_restrict_p(local const long2 *restrict localconstlong2restrictp,\n"
+        "                                           local const ulong2* restrict localconstulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector2_p(local volatile long2* localvolatilelong2p,\n"
+        "                                     local volatile ulong2 * localvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector2_restrict_p(local volatile long2 * restrict localvolatilelong2restrictp,\n"
+        "                                              local volatile ulong2*restrict localvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector2_p(local const volatile long2*localconstvolatilelong2p,\n"
+        "                                           local const volatile ulong2 *localconstvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector2_restrict_p(local const volatile long2 *restrict localconstvolatilelong2restrictp,\n"
+        "                                                    local const volatile ulong2* restrict localconstvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector2_d(long2 long2d,\n"
+        "                      ulong2 ulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector2_d(const long2 constlong2d,\n"
+        "                            const ulong2 constulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector2_d(private long2 privatelong2d,\n"
+        "                              private ulong2 privateulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector2_d(private const long2 privateconstlong2d,\n"
+        "                                    private const ulong2 privateconstulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector3_p1(constant long3* constantlong3p,\n"
+        "                               constant ulong3 * constantulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector3_restrict_p1(constant long3 * restrict constantlong3restrictp,\n"
+        "                                        constant ulong3*restrict constantulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector3_p(global long3* globallong3p,\n"
+        "                             global ulong3 * globalulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector3_restrict_p(global long3 * restrict globallong3restrictp,\n"
+        "                                      global ulong3*restrict globalulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector3_p(global const long3*globalconstlong3p,\n"
+        "                                   global const ulong3 *globalconstulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector3_restrict_p(global const long3 *restrict globalconstlong3restrictp,\n"
+        "                                            global const ulong3* restrict globalconstulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector3_p(global volatile long3* globalvolatilelong3p,\n"
+        "                                      global volatile ulong3 * globalvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector3_restrict_p(global volatile long3 * restrict globalvolatilelong3restrictp,\n"
+        "                                               global volatile ulong3*restrict globalvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector3_p(global const volatile long3*globalconstvolatilelong3p,\n"
+        "                                            global const volatile ulong3 *globalconstvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector3_restrict_p(global const volatile long3 *restrict globalconstvolatilelong3restrictp,\n"
+        "                                                     global const volatile ulong3* restrict globalconstvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector3_p(local long3* locallong3p,\n"
+        "                            local ulong3 * localulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector3_restrict_p(local long3 * restrict locallong3restrictp,\n"
+        "                                     local ulong3*restrict localulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector3_p(local const long3*localconstlong3p,\n"
+        "                                  local const ulong3 *localconstulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector3_restrict_p(local const long3 *restrict localconstlong3restrictp,\n"
+        "                                           local const ulong3* restrict localconstulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector3_p(local volatile long3* localvolatilelong3p,\n"
+        "                                     local volatile ulong3 * localvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector3_restrict_p(local volatile long3 * restrict localvolatilelong3restrictp,\n"
+        "                                              local volatile ulong3*restrict localvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector3_p(local const volatile long3*localconstvolatilelong3p,\n"
+        "                                           local const volatile ulong3 *localconstvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector3_restrict_p(local const volatile long3 *restrict localconstvolatilelong3restrictp,\n"
+        "                                                    local const volatile ulong3* restrict localconstvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector3_d(long3 long3d,\n"
+        "                      ulong3 ulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector3_d(const long3 constlong3d,\n"
+        "                            const ulong3 constulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector3_d(private long3 privatelong3d,\n"
+        "                              private ulong3 privateulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector3_d(private const long3 privateconstlong3d,\n"
+        "                                    private const ulong3 privateconstulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector4_p1(constant long4* constantlong4p,\n"
+        "                               constant ulong4 * constantulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector4_restrict_p1(constant long4 * restrict constantlong4restrictp,\n"
+        "                                        constant ulong4*restrict constantulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector4_p(global long4* globallong4p,\n"
+        "                             global ulong4 * globalulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector4_restrict_p(global long4 * restrict globallong4restrictp,\n"
+        "                                      global ulong4*restrict globalulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector4_p(global const long4*globalconstlong4p,\n"
+        "                                   global const ulong4 *globalconstulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector4_restrict_p(global const long4 *restrict globalconstlong4restrictp,\n"
+        "                                            global const ulong4* restrict globalconstulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector4_p(global volatile long4* globalvolatilelong4p,\n"
+        "                                      global volatile ulong4 * globalvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector4_restrict_p(global volatile long4 * restrict globalvolatilelong4restrictp,\n"
+        "                                               global volatile ulong4*restrict globalvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector4_p(global const volatile long4*globalconstvolatilelong4p,\n"
+        "                                            global const volatile ulong4 *globalconstvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector4_restrict_p(global const volatile long4 *restrict globalconstvolatilelong4restrictp,\n"
+        "                                                     global const volatile ulong4* restrict globalconstvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector4_p(local long4* locallong4p,\n"
+        "                            local ulong4 * localulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector4_restrict_p(local long4 * restrict locallong4restrictp,\n"
+        "                                     local ulong4*restrict localulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector4_p(local const long4*localconstlong4p,\n"
+        "                                  local const ulong4 *localconstulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector4_restrict_p(local const long4 *restrict localconstlong4restrictp,\n"
+        "                                           local const ulong4* restrict localconstulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector4_p(local volatile long4* localvolatilelong4p,\n"
+        "                                     local volatile ulong4 * localvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector4_restrict_p(local volatile long4 * restrict localvolatilelong4restrictp,\n"
+        "                                              local volatile ulong4*restrict localvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector4_p(local const volatile long4*localconstvolatilelong4p,\n"
+        "                                           local const volatile ulong4 *localconstvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector4_restrict_p(local const volatile long4 *restrict localconstvolatilelong4restrictp,\n"
+        "                                                    local const volatile ulong4* restrict localconstvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector4_d(long4 long4d,\n"
+        "                      ulong4 ulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector4_d(const long4 constlong4d,\n"
+        "                            const ulong4 constulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector4_d(private long4 privatelong4d,\n"
+        "                              private ulong4 privateulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector4_d(private const long4 privateconstlong4d,\n"
+        "                                    private const ulong4 privateconstulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector8_p1(constant long8* constantlong8p,\n"
+        "                               constant ulong8 * constantulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector8_restrict_p1(constant long8 * restrict constantlong8restrictp,\n"
+        "                                        constant ulong8*restrict constantulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector8_p(global long8* globallong8p,\n"
+        "                             global ulong8 * globalulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector8_restrict_p(global long8 * restrict globallong8restrictp,\n"
+        "                                      global ulong8*restrict globalulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector8_p(global const long8*globalconstlong8p,\n"
+        "                                   global const ulong8 *globalconstulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector8_restrict_p(global const long8 *restrict globalconstlong8restrictp,\n"
+        "                                            global const ulong8* restrict globalconstulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector8_p(global volatile long8* globalvolatilelong8p,\n"
+        "                                      global volatile ulong8 * globalvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector8_restrict_p(global volatile long8 * restrict globalvolatilelong8restrictp,\n"
+        "                                               global volatile ulong8*restrict globalvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector8_p(global const volatile long8*globalconstvolatilelong8p,\n"
+        "                                            global const volatile ulong8 *globalconstvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector8_restrict_p(global const volatile long8 *restrict globalconstvolatilelong8restrictp,\n"
+        "                                                     global const volatile ulong8* restrict globalconstvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector8_p(local long8* locallong8p,\n"
+        "                            local ulong8 * localulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector8_restrict_p(local long8 * restrict locallong8restrictp,\n"
+        "                                     local ulong8*restrict localulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector8_p(local const long8*localconstlong8p,\n"
+        "                                  local const ulong8 *localconstulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector8_restrict_p(local const long8 *restrict localconstlong8restrictp,\n"
+        "                                           local const ulong8* restrict localconstulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector8_p(local volatile long8* localvolatilelong8p,\n"
+        "                                     local volatile ulong8 * localvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector8_restrict_p(local volatile long8 * restrict localvolatilelong8restrictp,\n"
+        "                                              local volatile ulong8*restrict localvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector8_p(local const volatile long8*localconstvolatilelong8p,\n"
+        "                                           local const volatile ulong8 *localconstvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector8_restrict_p(local const volatile long8 *restrict localconstvolatilelong8restrictp,\n"
+        "                                                    local const volatile ulong8* restrict localconstvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector8_d(long8 long8d,\n"
+        "                      ulong8 ulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector8_d(const long8 constlong8d,\n"
+        "                            const ulong8 constulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector8_d(private long8 privatelong8d,\n"
+        "                              private ulong8 privateulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector8_d(private const long8 privateconstlong8d,\n"
+        "                                    private const ulong8 privateconstulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector16_p1(constant long16* constantlong16p,\n"
+        "                                constant ulong16 * constantulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector16_restrict_p1(constant long16 * restrict constantlong16restrictp,\n"
+        "                                         constant ulong16*restrict constantulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector16_p(global long16* globallong16p,\n"
+        "                              global ulong16 * globalulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector16_restrict_p(global long16 * restrict globallong16restrictp,\n"
+        "                                       global ulong16*restrict globalulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector16_p(global const long16*globalconstlong16p,\n"
+        "                                    global const ulong16 *globalconstulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector16_restrict_p(global const long16 *restrict globalconstlong16restrictp,\n"
+        "                                             global const ulong16* restrict globalconstulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector16_p(global volatile long16* globalvolatilelong16p,\n"
+        "                                       global volatile ulong16 * globalvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector16_restrict_p(global volatile long16 * restrict globalvolatilelong16restrictp,\n"
+        "                                                global volatile ulong16*restrict globalvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector16_p(global const volatile long16*globalconstvolatilelong16p,\n"
+        "                                             global const volatile ulong16 *globalconstvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector16_restrict_p(global const volatile long16 *restrict globalconstvolatilelong16restrictp,\n"
+        "                                                      global const volatile ulong16* restrict globalconstvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector16_p(local long16* locallong16p,\n"
+        "                             local ulong16 * localulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector16_restrict_p(local long16 * restrict locallong16restrictp,\n"
+        "                                      local ulong16*restrict localulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector16_p(local const long16*localconstlong16p,\n"
+        "                                   local const ulong16 *localconstulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector16_restrict_p(local const long16 *restrict localconstlong16restrictp,\n"
+        "                                            local const ulong16* restrict localconstulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector16_p(local volatile long16* localvolatilelong16p,\n"
+        "                                      local volatile ulong16 * localvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector16_restrict_p(local volatile long16 * restrict localvolatilelong16restrictp,\n"
+        "                                               local volatile ulong16*restrict localvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector16_p(local const volatile long16*localconstvolatilelong16p,\n"
+        "                                            local const volatile ulong16 *localconstvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector16_restrict_p(local const volatile long16 *restrict localconstvolatilelong16restrictp,\n"
+        "                                                     local const volatile ulong16* restrict localconstvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector16_d(long16 long16d,\n"
+        "                       ulong16 ulong16d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector16_d(const long16 constlong16d,\n"
+        "                             const ulong16 constulong16d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector16_d(private long16 privatelong16d,\n"
+        "                               private ulong16 privateulong16d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector16_d(private const long16 privateconstlong16d,\n"
+        "                                     private const ulong16 privateconstulong16d)\n"
+        "{}\n",
+        "\n"
+const char * long_arg_info[][72] = {
+  // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4
+  {
+        "constant_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "constantlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantulongp",
+        NULL
+  },
+  {
+        "constant_scalar_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantunsignedlongp",
+        NULL
+    },
+  {
+        "constant_scalar_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "constantlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantulongrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "globallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalunsignedlongp",
+        NULL
+    },
+    {
+        "global_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "globalconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstunsignedlongp",
+        NULL
+    },
+    {
+        "global_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "locallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localunsignedlongp",
+        NULL
+    },
+    {
+        "local_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "locallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "localconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstunsignedlongp",
+        NULL
+    },
+    {
+        "local_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "longd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "ulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "unsignedlongd",
+        NULL
+    },
+    {
+        "const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "constlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constunsignedlongd",
+        NULL
+    },
+    {
+        "private_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privatelongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateunsignedlongd",
+        NULL
+    },
+    {
+        "private_const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privateconstlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstunsignedlongd",
+        NULL
+    },
+    {
+        "constant_vector2_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "constantlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "constantulong2p",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "constantlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "constantulong2restrictp",
+        NULL
+    },
+    {
+        "global_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "globallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "globalulong2p",
+        NULL
+    },
+    {
+        "global_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalulong2restrictp",
+        NULL
+    },
+    {
+        "global_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "globalconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "globalconstulong2p",
+        NULL
+    },
+    {
+        "global_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstulong2restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalvolatileulong2p",
+        NULL
+    },
+    {
+        "global_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalconstvolatileulong2p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "local_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "locallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "localulong2p",
+        NULL
+    },
+    {
+        "local_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "locallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localulong2restrictp",
+        NULL
+    },
+    {
+        "local_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "localconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "localconstulong2p",
+        NULL
+    },
+    {
+        "local_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstulong2restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localvolatileulong2p",
+        NULL
+    },
+    {
+        "local_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localconstvolatileulong2p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "long2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "ulong2d",
+        NULL
+    },
+    {
+        "const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "constlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "constulong2d",
+        NULL
+    },
+    {
+        "private_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privatelong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateulong2d",
+        NULL
+    },
+    {
+        "private_const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privateconstlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateconstulong2d",
+        NULL
+    },
+    {
+        "constant_vector3_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "constantlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "constantulong3p",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "constantlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "constantulong3restrictp",
+        NULL
+    },
+    {
+        "global_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "globallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "globalulong3p",
+        NULL
+    },
+    {
+        "global_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalulong3restrictp",
+        NULL
+    },
+    {
+        "global_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "globalconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "globalconstulong3p",
+        NULL
+    },
+    {
+        "global_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstulong3restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalvolatileulong3p",
+        NULL
+    },
+    {
+        "global_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalconstvolatileulong3p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "local_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "locallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "localulong3p",
+        NULL
+    },
+    {
+        "local_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "locallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localulong3restrictp",
+        NULL
+    },
+    {
+        "local_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "localconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "localconstulong3p",
+        NULL
+    },
+    {
+        "local_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstulong3restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localvolatileulong3p",
+        NULL
+    },
+    {
+        "local_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localconstvolatileulong3p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "long3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "ulong3d",
+        NULL
+    },
+    {
+        "const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "constlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "constulong3d",
+        NULL
+    },
+    {
+        "private_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privatelong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateulong3d",
+        NULL
+    },
+    {
+        "private_const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privateconstlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateconstulong3d",
+        NULL
+    },
+    {
+        "constant_vector4_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "constantlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "constantulong4p",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "constantlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "constantulong4restrictp",
+        NULL
+    },
+    {
+        "global_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "globallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "globalulong4p",
+        NULL
+    },
+    {
+        "global_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalulong4restrictp",
+        NULL
+    },
+    {
+        "global_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "globalconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "globalconstulong4p",
+        NULL
+    },
+    {
+        "global_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstulong4restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalvolatileulong4p",
+        NULL
+    },
+    {
+        "global_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalconstvolatileulong4p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "local_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "locallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "localulong4p",
+        NULL
+    },
+    {
+        "local_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "locallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localulong4restrictp",
+        NULL
+    },
+    {
+        "local_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "localconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "localconstulong4p",
+        NULL
+    },
+    {
+        "local_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstulong4restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localvolatileulong4p",
+        NULL
+    },
+    {
+        "local_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localconstvolatileulong4p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "long4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "ulong4d",
+        NULL
+    },
+    {
+        "const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "constlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "constulong4d",
+        NULL
+    },
+    {
+        "private_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privatelong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateulong4d",
+        NULL
+    },
+    {
+        "private_const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privateconstlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateconstulong4d",
+        NULL
+    },
+    {
+        "constant_vector8_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "constantlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "constantulong8p",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "constantlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "constantulong8restrictp",
+        NULL
+    },
+    {
+        "global_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "globallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "globalulong8p",
+        NULL
+    },
+    {
+        "global_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalulong8restrictp",
+        NULL
+    },
+    {
+        "global_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "globalconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "globalconstulong8p",
+        NULL
+    },
+    {
+        "global_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstulong8restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalvolatileulong8p",
+        NULL
+    },
+    {
+        "global_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalconstvolatileulong8p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "local_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "locallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "localulong8p",
+        NULL
+    },
+    {
+        "local_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "locallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localulong8restrictp",
+        NULL
+    },
+    {
+        "local_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "localconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "localconstulong8p",
+        NULL
+    },
+    {
+        "local_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstulong8restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localvolatileulong8p",
+        NULL
+    },
+    {
+        "local_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localconstvolatileulong8p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "long8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "ulong8d",
+        NULL
+    },
+    {
+        "const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "constlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "constulong8d",
+        NULL
+    },
+    {
+        "private_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privatelong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateulong8d",
+        NULL
+    },
+    {
+        "private_const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privateconstlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateconstulong8d",
+        NULL
+    },
+    {
+        "constant_vector16_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "constantlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "constantulong16p",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "constantlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "constantulong16restrictp",
+        NULL
+    },
+    {
+        "global_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "globallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "globalulong16p",
+        NULL
+    },
+    {
+        "global_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalulong16restrictp",
+        NULL
+    },
+    {
+        "global_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "globalconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "globalconstulong16p",
+        NULL
+    },
+    {
+        "global_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstulong16restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalvolatileulong16p",
+        NULL
+    },
+    {
+        "global_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalconstvolatileulong16p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "local_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "locallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "localulong16p",
+        NULL
+    },
+    {
+        "local_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "locallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localulong16restrictp",
+        NULL
+    },
+    {
+        "local_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "localconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "localconstulong16p",
+        NULL
+    },
+    {
+        "local_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstulong16restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localvolatileulong16p",
+        NULL
+    },
+    {
+        "local_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localconstvolatileulong16p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "long16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "ulong16d",
+        NULL
+    },
+    {
+        "const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "constlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "constulong16d",
+        NULL
+    },
+    {
+        "private_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privatelong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateulong16d",
+        NULL
+    },
+    {
+        "private_const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privateconstlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateconstulong16d",
+        NULL
+    },
+template<typename arg_info_t>
+int test(cl_device_id deviceID, cl_context context, kernel_args_t kernel_args, cl_uint lines_count, arg_info_t arg_info, size_t total_kernels_in_program) {
+    const size_t max_name_len = 512;
+    cl_char name[ max_name_len ];
+    cl_uint arg_count, numArgs;
+    size_t i, j, size;
+    int error;
+    clProgramWrapper program =
+    clCreateProgramWithSource(context, lines_count, kernel_args, NULL, &error);
+    if ( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create required arguments kernel program" );
+        return -1;
-    std::string access_qualifier =
-        get_access_qualifier(kernel_arg.access_qualifier);
-    std::string type_qualifier_prefix =
-        get_type_qualifier_prefix(kernel_arg.type_qualifier);
-    std::string type_qualifier_postfix =
-        get_type_qualifier_postfix(kernel_arg.type_qualifier);
+    // Compile the program
+    log_info( "Building kernels...\n" );
+    clBuildProgram( program, 1, &deviceID, "-cl-kernel-arg-info", NULL, NULL );
-    ret += address_qualifier + " ";
-    ret += access_qualifier + " ";
-    ret += type_qualifier_prefix + " ";
-    ret += kernel_arg.arg_type;
-    ret += " ";
-    ret += type_qualifier_postfix + " ";
-    ret += kernel_arg.arg_name;
-    return ret;
+    // check for build errors and exit if things didn't work
+    size_t size_ret;
+    cl_build_status build_status;
+    error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof(build_status), &build_status, &size_ret);
+    test_error( error, "Unable to query build status" );
+    if (build_status == CL_BUILD_ERROR) {
+        printf("CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to get build log size" );
+        char *build_log = (char *)malloc(size_ret);
+        error = clGetProgramBuildInfo(program,deviceID, CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to get build log" );
+        printf("CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        printf("CL_BUILD_ERROR. exiting\n");
+        free(build_log);
+        return -1;
+    }
-/* This function generates a kernel source and allows for multiple arguments to
- * be passed in and subsequently queried. */
-static std::string generate_kernel(const std::vector<KernelArgInfo>& all_args,
-                                   const bool supports_3d_image_writes = false)
+    // Lookup the number of kernels in the program.
+    log_info( "Testing kernels...\n" );
+    size_t total_kernels = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_NUM_KERNELS, sizeof( size_t ), &total_kernels, NULL );
+    test_error( error, "Unable to get program info num kernels" );
-    std::string ret;
-    if (supports_3d_image_writes)
+    if ( total_kernels != total_kernels_in_program )
-        ret += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable\n";
+        print_error( error, "Program did not build all kernels" );
+        return -1;
-    ret += "kernel void get_kernel_arg_info(\n";
-    for (int i = 0; i < all_args.size(); ++i)
-    {
-        const KernelArgInfo& arg = all_args[i];
-        ret += generate_argument(all_args[i]);
-        if (i == all_args.size() - 1)
-        {
-            ret += "\n";
-        }
-        else
-        {
-            ret += ",\n";
-        }
-    }
-    ret += "){}";
-    return ret;
-static const char* get_kernel_arg_address_qualifier(
-    cl_kernel_arg_address_qualifier address_qualifier)
-    switch (address_qualifier)
-    {
-            return "GLOBAL";
-        }
-            return "LOCAL";
-        }
-            return "CONSTANT";
-        }
-        default: {
-            return "PRIVATE";
-        }
-    }
+    // Lookup the kernel names.
+    size_t kernel_names_len = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, 0, NULL, &kernel_names_len );
+    test_error( error, "Unable to get length of kernel names list." );
-static const char*
-get_kernel_arg_access_qualifier(cl_kernel_arg_access_qualifier access_qualifier)
-    switch (access_qualifier)
+    size_t expected_kernel_names_len = 0;
+    for ( i = 0; i < total_kernels; ++i )
-            return "READ_ONLY";
-        }
-            return "WRITE_ONLY";
-        }
-            return "READ_WRITE";
-        }
-        default: {
-            return "NONE";
-        }
+        expected_kernel_names_len += 1 + strlen( arg_info[ i ][ 0 ] );
+    if ( kernel_names_len != expected_kernel_names_len )
+    {
+        log_error( "Kernel names string is not the right length, expected %d, got %d\n", (int) expected_kernel_names_len, (int) kernel_names_len );
+        return -1;
+    }
-get_kernel_arg_type_qualifier(cl_kernel_arg_type_qualifier type_qualifier)
-    std::string ret;
+    const size_t len = ( kernel_names_len + 1 ) * sizeof( char );
+    char* kernel_names = (char*) malloc( len );
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, len, kernel_names, &kernel_names_len );
+    test_error( error, "Unable to get kernel names list." );
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_CONST) ret += "CONST ";
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_RESTRICT) ret += "RESTRICT ";
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_VOLATILE) ret += "VOLATILE ";
-    if (type_qualifier & CL_KERNEL_ARG_TYPE_PIPE) ret += "PIPE";
+    // Check to see if the kernel name array is null terminated.
+    if ( kernel_names[ kernel_names_len - 1 ] != '\0' )
+    {
+        free( kernel_names );
+        print_error( error, "Kernel name list was not null terminated" );
+        return -1;
+    }
-    return ret;
+    // Check to see if the correct kernel name string was returned.
+    // Does the string contain each expected kernel name?
+    for ( i = 0; i < total_kernels; ++i )
+        if ( !strstr( kernel_names, arg_info[ i ][ 0 ] ) )
+            break;
+    if ( i != total_kernels )
+    {
+        log_error( "Kernel names string is missing \"%s\"\n", arg_info[ i ][ 0 ] );
+        free( kernel_names );
+        return -1;
+    }
-static void output_difference(const KernelArgInfo& expected,
-                              const KernelArgInfo& actual)
-    if (actual.address_qualifier != expected.address_qualifier)
+    // Are the kernel names delimited by ';'?
+    if ( !strtok( kernel_names, ";" ) )
-        log_error("Address Qualifier: Expected: %s\t Actual: %s\n",
-                  get_kernel_arg_address_qualifier(expected.address_qualifier),
-                  get_kernel_arg_address_qualifier(actual.address_qualifier));
-    }
-    if (actual.access_qualifier != expected.access_qualifier)
-    {
-        log_error("Access Qualifier: Expected: %s\t Actual: %s\n",
-                  get_kernel_arg_access_qualifier(expected.access_qualifier),
-                  get_kernel_arg_access_qualifier(actual.access_qualifier));
-    }
-    if (actual.type_qualifier != expected.type_qualifier)
-    {
-        log_error(
-            "Type Qualifier: Expected: %s\t Actual: %s\n",
-            get_kernel_arg_type_qualifier(expected.type_qualifier).c_str(),
-            get_kernel_arg_type_qualifier(actual.type_qualifier).c_str());
-    }
-    if (strcmp(actual.arg_type, expected.arg_type) != 0)
-    {
-        log_error("Arg Type: Expected: %s\t Actual: %s\n", expected.arg_type,
-                  actual.arg_type);
-    }
-    if (strcmp(actual.arg_name, expected.arg_name) != 0)
-    {
-        log_error("Arg Name: Expected: %s\t Actual: %s\n", expected.arg_name,
-                  actual.arg_name);
-    }
-    log_error("Argument in Kernel Source Reported as:\n%s\n",
-              expected.arg_string.c_str());
-static int compare_expected_actual(const KernelArgInfo& expected,
-                                   const KernelArgInfo& actual)
-    ++gTestCount;
-    int ret = TEST_PASS;
-    if ((actual.address_qualifier != expected.address_qualifier)
-        || (actual.access_qualifier != expected.access_qualifier)
-        || (actual.type_qualifier != expected.type_qualifier)
-        || (strcmp(actual.arg_type, expected.arg_type) != 0)
-        || (strcmp(actual.arg_name, expected.arg_name) != 0))
-    {
-        ret = TEST_FAIL;
-        output_difference(expected, actual);
-        ++gFailCount;
-    }
-    return ret;
-static bool device_supports_pipes(cl_device_id deviceID)
-    auto version = get_device_cl_version(deviceID);
-    {
-        return false;
-    }
-    cl_uint max_packet_size = 0;
-    cl_int err =
-        clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_MAX_PACKET_SIZE,
-                        sizeof(max_packet_size), &max_packet_size, nullptr);
-    test_error_ret(err, "clGetDeviceInfo", false);
-    if ((max_packet_size == 0) && (version >= Version(3, 0)))
-    {
-        return false;
-    }
-    return true;
-static std::string get_build_options(cl_device_id deviceID)
-    std::string ret = "-cl-kernel-arg-info";
-    if (get_device_cl_version(deviceID) >= MINIMUM_OPENCL_PIPE_VERSION)
-    {
-        if (device_supports_pipes(deviceID))
-        {
-            if (get_device_cl_version(deviceID) >= Version(3, 0))
-            {
-                ret += " -cl-std=CL3.0";
-            }
-            else
-            {
-                ret += " -cl-std=CL2.0";
-            }
-        }
-    }
-    return ret;
-static std::string get_expected_arg_type(const std::string& type_string,
-                                         const bool is_pointer)
-    bool is_unsigned = false;
-    std::istringstream type_stream(type_string);
-    std::string base_type = "";
-    std::string ret = "";
-    /* Signed and Unsigned on their own represent an int */
-    if (type_string == "signed" || type_string == "signed*")
-    {
-        base_type = "int";
-    }
-    else if (type_string == "unsigned" || type_string == "unsigned*")
-    {
-        base_type = "int";
-        is_unsigned = true;
+        error = -1;
-        std::string token;
-        /* Iterate through the argument type to determine what the type is and
-         * whether or not it is signed */
-        while (std::getline(type_stream, token, ' '))
+        for ( i = 1; i < total_kernels; ++i )
-            if (token.find("unsigned") != std::string::npos)
+            if ( !strtok( NULL, ";" ) )
-                is_unsigned = true;
-            }
-            if (token.find("signed") == std::string::npos)
-            {
-                base_type = token;
+                error = -1;
-    ret = base_type;
-    if (is_unsigned)
+    if ( error )
-        ret.insert(0, "u");
+        log_error( "Kernel names string was not properly delimited by ';'\n" );
+        free( kernel_names );
+        return -1;
-    /* Ensure that the data type is a pointer if it is not already when
-     * necessary */
-    if (is_pointer && ret.back() != '*')
-    {
-        ret += "*";
-    }
-    return ret;
+    free( kernel_names );
-static KernelArgInfo
-create_expected_arg_info(const KernelArgInfo& kernel_argument, bool is_pointer)
-    KernelArgInfo ret = kernel_argument;
-    const std::string arg_string = generate_argument(kernel_argument);
-    ret.arg_string = arg_string;
-    std::string type_string(kernel_argument.arg_type);
-    /* We only need to modify the expected return values for scalar types */
-    if ((is_pointer && !isdigit(type_string.back() - 1))
-        || !isdigit(type_string.back()))
+    // Create kernel objects and query them.
+    int rc = 0;
+    for ( i = 0; i < total_kernels; ++i )
-        std::string expected_arg_type =
-            get_expected_arg_type(type_string, is_pointer);
-        /* Reset the Contents of expected arg_type char[] and then assign it to
-         * the expected value */
-        memset(ret.arg_type, 0, sizeof(ret.arg_type));
-        strcpy(ret.arg_type, expected_arg_type.c_str());
-    }
-    /* Any values passed by reference has TYPE_NONE */
-    if (!is_pointer)
-    {
-        ret.type_qualifier = CL_KERNEL_ARG_TYPE_NONE;
-    }
-    /* If the address qualifier is CONSTANT we expect to see the TYPE_CONST
-     * qualifier*/
-    if (kernel_argument.address_qualifier == CL_KERNEL_ARG_ADDRESS_CONSTANT)
-    {
-        ret.type_qualifier |= CL_KERNEL_ARG_TYPE_CONST;
-    }
-    /* The PIPE qualifier is special. It can only be used in a global scope. It
-     * also ignores any other qualifiers */
-    if (kernel_argument.type_qualifier & CL_KERNEL_ARG_TYPE_PIPE)
-    {
-        ret.address_qualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL;
-        ret.type_qualifier = CL_KERNEL_ARG_TYPE_PIPE;
-    }
-    return ret;
-/* There are too many vector arguments for it to be worth writing down
- * statically and are instead generated here and combined with all of the scalar
- * and unsigned scalar types in a single data structure */
-static std::vector<std::string>
-generate_all_type_arguments(cl_device_id deviceID)
-    std::vector<std::string> ret = {
-        "char",           "short",        "int",           "float",
-        "void",           "uchar",        "unsigned char", "ushort",
-        "unsigned short", "uint",         "unsigned int",  "char unsigned",
-        "short unsigned", "int unsigned", "signed short",  "signed int",
-        "signed long",    "short signed", "int signed",    "signed",
-        "unsigned"
-    };
-    std::vector<std::string> vector_types = { "char",   "uchar", "short",
-                                              "ushort", "int",   "uint",
-                                              "float" };
-    if (gHasLong)
-    {
-        ret.push_back("long");
-        ret.push_back("ulong");
-        ret.push_back("unsigned long");
-        ret.push_back("long unsigned");
-        ret.push_back("long signed");
-        vector_types.push_back("long");
-        vector_types.push_back("ulong");
-    }
-    if (device_supports_half(deviceID))
-    {
-        vector_types.push_back("half");
-    }
-    if (device_supports_double(deviceID))
-    {
-        vector_types.push_back("double");
-    }
-    static const std::vector<std::string> vector_values = { "2", "3", "4", "8",
-                                                            "16" };
-    for (auto vector_type : vector_types)
-    {
-        for (auto vector_value : vector_values)
+        int kernel_rc = 0;
+        const char* kernel_name = arg_info[ i ][ 0 ];
+        clKernelWrapper kernel = clCreateKernel(program, kernel_name, &error);
+        if( kernel == NULL || error != CL_SUCCESS )
-            ret.push_back(vector_type + vector_value);
+            log_error( "ERROR: Could not get kernel: %s\n", kernel_name );
+            kernel_rc = -1;
-    }
-    return ret;
-static int
-compare_kernel_with_expected(cl_context context, cl_device_id deviceID,
-                             const char* kernel_src,
-                             const std::vector<KernelArgInfo>& expected_args)
-    int failed_tests = 0;
-    clKernelWrapper kernel;
-    clProgramWrapper program;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(deviceID).c_str());
-    test_error(err, "create_single_kernel_helper_with_build_options");
-    for (int i = 0; i < expected_args.size(); ++i)
-    {
-        KernelArgInfo actual;
-        err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_ADDRESS_QUALIFIER,
-                                 sizeof(actual.address_qualifier),
-                                 &(actual.address_qualifier), nullptr);
-        test_error(err, "clGetKernelArgInfo");
-        err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_ACCESS_QUALIFIER,
-                                 sizeof(actual.access_qualifier),
-                                 &(actual.access_qualifier), nullptr);
-        test_error(err, "clGetKernelArgInfo");
-        err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_QUALIFIER,
-                                 sizeof(actual.type_qualifier),
-                                 &(actual.type_qualifier), nullptr);
-        test_error(err, "clGetKernelArgInfo");
-        err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME,
-                                 sizeof(actual.arg_type), &(actual.arg_type),
-                                 nullptr);
-        test_error(err, "clGetKernelArgInfo");
-        err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME,
-                                 sizeof(actual.arg_name), &(actual.arg_name),
-                                 nullptr);
-        test_error(err, "clGetKernelArgInfo");
-        failed_tests += compare_expected_actual(expected_args[i], actual);
-    }
-    return failed_tests;
-size_t get_param_size(const std::string& arg_type, cl_device_id deviceID,
-                      bool is_pipe)
-    if (is_pipe)
-    {
-        return (sizeof(int*));
-    }
-    if (arg_type.find("*") != std::string::npos)
-    {
-        cl_uint device_address_bits = 0;
-        cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS,
-                                     sizeof(device_address_bits),
-                                     &device_address_bits, NULL);
-        return (device_address_bits / 8);
-    }
-    size_t ret(0);
-    if (arg_type.find("char") != std::string::npos)
-    {
-        ret += sizeof(cl_char);
-    }
-    if (arg_type.find("short") != std::string::npos)
-    {
-        ret += sizeof(cl_short);
-    }
-    if (arg_type.find("half") != std::string::npos)
-    {
-        ret += sizeof(cl_half);
-    }
-    if (arg_type.find("int") != std::string::npos)
-    {
-        ret += sizeof(cl_int);
-    }
-    if (arg_type.find("long") != std::string::npos)
-    {
-        ret += sizeof(cl_long);
-    }
-    if (arg_type.find("float") != std::string::npos)
-    {
-        ret += sizeof(cl_float);
-    }
-    if (arg_type.find("double") != std::string::npos)
-    {
-        ret += sizeof(cl_double);
-    }
-    if (arg_type.back() == '2')
-    {
-        ret *= 2;
-    }
-    if (arg_type.back() == '3')
-    {
-        ret *= 4;
-    }
-    if (arg_type.back() == '4')
-    {
-        ret *= 4;
-    }
-    if (arg_type.back() == '8')
-    {
-        ret *= 8;
-    }
-    // If the last character is a 6 it represents a vector of 16
-    if (arg_type.back() == '6')
-    {
-        ret *= 16;
-    }
-    return ret;
-static int run_scalar_vector_tests(cl_context context, cl_device_id deviceID)
-    int failed_tests = 0;
-    std::vector<std::string> type_arguments =
-        generate_all_type_arguments(deviceID);
-    const std::vector<cl_kernel_arg_access_qualifier> access_qualifiers = {
-    };
-    std::vector<KernelArgInfo> all_args, expected_args;
-    size_t max_param_size = get_max_param_size(deviceID);
-    size_t total_param_size(0);
-    for (auto address_qualifier : address_qualifiers)
-    {
-        bool is_private = (address_qualifier == CL_KERNEL_ARG_ADDRESS_PRIVATE);
-        /* OpenCL kernels cannot take "private" pointers and only "private"
-         * variables can take values */
-        bool is_pointer = !is_private;
-        for (auto type_qualifier : type_qualifiers)
+        if(kernel_rc == 0)
-            bool is_pipe = (type_qualifier & CL_KERNEL_ARG_TYPE_PIPE);
-            bool is_restrict = (type_qualifier & CL_KERNEL_ARG_TYPE_RESTRICT);
+            // Determine the expected number of arguments.
+            arg_count = 0;
+            while (arg_info[ i ][ (ARG_INFO_FIELD_COUNT * arg_count) + 1 ] != NULL)
+                ++arg_count;
-            for (auto access_qualifier : access_qualifiers)
+            // Try to get the number of arguments.
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &size );
+            test_error( error, "Unable to get kernel arg count param size" );
+            if( size != sizeof( numArgs ) )
-                bool has_access_qualifier =
-                    (access_qualifier != CL_KERNEL_ARG_ACCESS_NONE);
+                log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d) for kernel: %s\n", (int)sizeof( numArgs ), (int)size, kernel_name );
+                kernel_rc = -1;
+            }
+        }
-                /*Only images and pipes can have an access qualifier,
-                 * otherwise it should be ACCESS_NONE */
-                if (!is_pipe && has_access_qualifier)
+        if(kernel_rc == 0)
+        {
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
+            test_error( error, "Unable to get kernel arg count" );
+            if( numArgs != arg_count )
+            {
+                log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d) for kernel: %s\n", arg_count, numArgs, kernel_name );
+                kernel_rc = -1;
+            }
+        }
+        if(kernel_rc == 0)
+        {
+            for ( j = 0; j < numArgs; ++j )
+            {
+                int arg_rc = 0;
+                cl_kernel_arg_address_qualifier expected_address_qualifier = (cl_kernel_arg_address_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ADDR_OFFSET ];
+                cl_kernel_arg_access_qualifier expected_access_qualifier =  (cl_kernel_arg_access_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ACCESS_OFFSET ];
+                cl_kernel_arg_type_qualifier expected_type_qualifier = (cl_kernel_arg_type_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_QUAL_OFFSET ];
+                const char* expected_type_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_NAME_OFFSET ];
+                const char* expected_arg_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ARG_NAME_OFFSET ];
+                // Try to get the address qualifier of each argument.
+                cl_kernel_arg_address_qualifier address_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof address_qualifier, &address_qualifier, &size );
+                test_error( error, "Unable to get argument address qualifier" );
+                error = (address_qualifier != expected_address_qualifier);
+                if ( error )
-                    continue;
+                    log_error( "ERROR: Bad address qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_address_qualifier, (unsigned int)address_qualifier );
+                    arg_rc = -1;
-                /* If the type is a pipe, then either the specified or
-                 * default access qualifier is returned and so "NONE" will
-                 * never be returned */
-                if (is_pipe && !has_access_qualifier)
+                // Try to get the access qualifier of each argument.
+                cl_kernel_arg_access_qualifier access_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof access_qualifier, &access_qualifier, &size );
+                test_error( error, "Unable to get argument access qualifier" );
+                error = (access_qualifier != expected_access_qualifier);
+                if ( error )
-                    continue;
+                    log_error( "ERROR: Bad access qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_access_qualifier, (unsigned int)access_qualifier );
+                    arg_rc = -1;
-                /* The "restrict" type qualifier can only apply to
-                 * pointers
-                 */
-                if (is_restrict && !is_pointer)
+                // Try to get the type qualifier of each argument.
+                cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof arg_type_qualifier, &arg_type_qualifier, &size );
+                test_error( error, "Unable to get argument type qualifier" );
+                error = (arg_type_qualifier != expected_type_qualifier);
+                if ( error )
-                    continue;
+                    log_error( "ERROR: Bad type qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_type_qualifier, (unsigned int)arg_type_qualifier );
+                    arg_rc = -1;
-                /* We cannot have pipe pointers */
-                if (is_pipe && is_pointer)
+                // Try to get the type of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo(kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument type name" );
+                error = strcmp( (const char*) name, expected_type_name );
+                if ( error )
-                    continue;
+                    log_error( "ERROR: Bad argument type name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_type_name, name );
+                    arg_rc = -1;
-                for (auto arg_type : type_arguments)
+                // Try to get the name of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument name" );
+                error = strcmp( (const char*) name, expected_arg_name );
+                if ( error )
-                    /* Void Types cannot be private */
-                    if (is_private && arg_type == "void")
-                    {
-                        continue;
-                    }
+                    log_error( "ERROR: Bad argument name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_arg_name, name );
+                    arg_rc = -1;
+                }
-                    if (is_pointer)
-                    {
-                        arg_type += "*";
-                    }
-                    size_t param_size =
-                        get_param_size(arg_type, deviceID, is_pipe);
-                    if (param_size + total_param_size >= max_param_size
-                        || all_args.size() == MAX_NUMBER_OF_KERNEL_ARGS)
-                    {
-                        const std::string kernel_src =
-                            generate_kernel(all_args);
-                        failed_tests += compare_kernel_with_expected(
-                            context, deviceID, kernel_src.c_str(),
-                            expected_args);
-                        all_args.clear();
-                        expected_args.clear();
-                        total_param_size = 0;
-                    }
-                    total_param_size += param_size;
-                    KernelArgInfo kernel_argument(
-                        address_qualifier, access_qualifier, type_qualifier,
-                        arg_type, all_args.size());
-                    expected_args.push_back(
-                        create_expected_arg_info(kernel_argument, is_pointer));
-                    all_args.push_back(kernel_argument);
+                if(arg_rc != 0) {
+                    kernel_rc = -1;
-    }
-    const std::string kernel_src = generate_kernel(all_args);
-    failed_tests += compare_kernel_with_expected(
-        context, deviceID, kernel_src.c_str(), expected_args);
-    return failed_tests;
-static cl_uint get_max_number_of_pipes(cl_device_id deviceID, cl_int& err)
-    cl_uint ret(0);
-    err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PIPE_ARGS, sizeof(ret), &ret,
-                          nullptr);
-    return ret;
-static int run_pipe_tests(cl_context context, cl_device_id deviceID)
-    int failed_tests = 0;
-    cl_kernel_arg_address_qualifier address_qualifier =
-    std::vector<std::string> type_arguments =
-        generate_all_type_arguments(deviceID);
-    const std::vector<cl_kernel_arg_access_qualifier> access_qualifiers = {
-    };
-    std::vector<KernelArgInfo> all_args, expected_args;
-    size_t max_param_size = get_max_param_size(deviceID);
-    size_t total_param_size(0);
-    cl_int err = CL_SUCCESS;
-    cl_uint max_number_of_pipes = get_max_number_of_pipes(deviceID, err);
-    test_error_ret(err, "get_max_number_of_pipes", TEST_FAIL);
-    cl_uint number_of_pipes(0);
-    const bool is_pointer = false;
-    const bool is_pipe = true;
-    for (auto type_qualifier : pipe_qualifiers)
-    {
-        for (auto access_qualifier : access_qualifiers)
-        {
-            for (auto arg_type : type_arguments)
-            {
-                /* We cannot have void pipes */
-                if (arg_type == "void")
-                {
-                    continue;
-                }
-                size_t param_size = get_param_size(arg_type, deviceID, is_pipe);
-                if (param_size + total_param_size >= max_param_size
-                    || number_of_pipes == max_number_of_pipes)
-                {
-                    const std::string kernel_src = generate_kernel(all_args);
-                    failed_tests += compare_kernel_with_expected(
-                        context, deviceID, kernel_src.c_str(), expected_args);
-                    all_args.clear();
-                    expected_args.clear();
-                    total_param_size = 0;
-                    number_of_pipes = 0;
-                }
-                total_param_size += param_size;
-                number_of_pipes++;
-                KernelArgInfo kernel_argument(address_qualifier,
-                                              access_qualifier, type_qualifier,
-                                              arg_type, all_args.size());
-                expected_args.push_back(
-                    create_expected_arg_info(kernel_argument, is_pointer));
-                all_args.push_back(kernel_argument);
-            }
+        //log_info( "%s ... %s\n",arg_info[i][0],kernel_rc == 0 ? "passed" : "failed" );
+        if(kernel_rc != 0) {
+            rc = -1;
-    const std::string kernel_src = generate_kernel(all_args);
-    failed_tests += compare_kernel_with_expected(
-        context, deviceID, kernel_src.c_str(), expected_args);
-    return failed_tests;
+  return rc;
-static int run_sampler_test(cl_context context, cl_device_id deviceID)
-    cl_kernel_arg_address_qualifier address_qualifier =
-    cl_kernel_arg_type_qualifier type_qualifier = CL_KERNEL_ARG_TYPE_NONE;
-    cl_kernel_arg_access_qualifier access_qualifier = CL_KERNEL_ARG_ACCESS_NONE;
-    std::string image_type = "sampler_t";
-    bool is_pointer = false;
-    KernelArgInfo kernel_argument(address_qualifier, access_qualifier,
-                                  type_qualifier, image_type,
-                                  SINGLE_KERNEL_ARG_NUMBER);
-    KernelArgInfo expected =
-        create_expected_arg_info(kernel_argument, is_pointer);
-    const std::string kernel_src = generate_kernel({ kernel_argument });
-    return compare_kernel_with_expected(context, deviceID, kernel_src.c_str(),
-                                        { expected });
-static int run_image_tests(cl_context context, cl_device_id deviceID)
-    int failed_tests = 0;
-    bool supports_3d_image_writes =
-        is_extension_available(deviceID, "cl_khr_3d_image_writes");
-    bool is_pointer = false;
-    cl_kernel_arg_type_qualifier type_qualifier = CL_KERNEL_ARG_TYPE_NONE;
-    cl_kernel_arg_address_qualifier address_qualifier =
-    for (auto access_qualifier : access_qualifiers)
-    {
-        bool is_write =
-            (access_qualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY
-             || access_qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE);
-        for (auto image_type : image_arguments)
-        {
-            bool is_3d_image = image_type == "image3d_t";
-            /* We can only test 3d image writes if our device supports it */
-            if (is_3d_image && is_write)
-            {
-                if (!supports_3d_image_writes)
-                {
-                    continue;
-                }
-            }
-            KernelArgInfo kernel_argument(address_qualifier, access_qualifier,
-                                          type_qualifier, image_type,
-                                          SINGLE_KERNEL_ARG_NUMBER);
-            KernelArgInfo expected =
-                create_expected_arg_info(kernel_argument, is_pointer);
-            const std::string kernel_src =
-                generate_kernel({ kernel_argument }, supports_3d_image_writes);
-            failed_tests += compare_kernel_with_expected(
-                context, deviceID, kernel_src.c_str(), { expected });
-        }
-    }
-    failed_tests += run_sampler_test(context, deviceID);
-    return failed_tests;
-/* Ensure clGetKernelArgInfo returns successfully when param_value is
- * set to null */
-static int test_null_param(cl_context context, cl_device_id deviceID,
-                           char const* kernel_src)
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(deviceID).c_str());
-    test_error_ret(err, "create_single_kernel_helper_with_build_options",
-                   TEST_FAIL);
-    err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
-                             CL_KERNEL_ARG_ADDRESS_QUALIFIER, 0, nullptr,
-                             nullptr);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-    err =
-        clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
-                           CL_KERNEL_ARG_ACCESS_QUALIFIER, 0, nullptr, nullptr);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-    err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
-                             CL_KERNEL_ARG_TYPE_QUALIFIER, 0, nullptr, nullptr);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-    err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
-                             CL_KERNEL_ARG_TYPE_NAME, 0, nullptr, nullptr);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-    err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
-                             CL_KERNEL_ARG_NAME, 0, nullptr, nullptr);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-    return TEST_PASS;
-/* Ensure clGetKernelArgInfo returns the correct size in bytes for the
- * kernel arg name */
-static int test_arg_name_size(cl_context context, cl_device_id deviceID,
-                              char const* kernel_src)
+int    test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
     size_t size;
-    /* We are adding +1 because the argument used in this kernel is argument0
-     * which has 1 extra character than just the base argument name */
-    char arg_return[sizeof(KERNEL_ARGUMENT_NAME) + 1];
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(deviceID).c_str());
+    int error;
-    test_error_ret(err, "create_single_kernel_helper_with_build_options",
-                   TEST_FAIL);
+    cl_bool supports_double = 0; // assume not
+    cl_bool supports_half = 0; // assume not
+    cl_bool supports_images = 0; // assume not
+    cl_bool supports_long = 0; // assume not
+    cl_bool supports_3D_images = 0; // assume not
-    err =
-        clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, CL_KERNEL_ARG_NAME,
-                           sizeof(arg_return), &arg_return, &size);
-    test_error_ret(err, "clGetKernelArgInfo", TEST_FAIL);
-    if (size == sizeof(KERNEL_ARGUMENT_NAME) + 1)
-    {
-        return TEST_PASS;
-    }
-    else
-    {
-        return TEST_FAIL;
-    }
+    // Check if this device supports images
+  error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof supports_images, &supports_images, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
-static int run_boundary_tests(cl_context context, cl_device_id deviceID)
-    int failed_tests = 0;
+  if (supports_images) {
+    log_info(" o Device supports images\n");
+    log_info(" o Expecting SUCCESS when testing image kernel arguments.\n");
+  }
+  else {
+    log_info(" o Device lacks image support\n");
+    log_info(" o Not testing image kernel arguments.\n");
+  }
-    cl_kernel_arg_address_qualifier address_qualifier =
-    cl_kernel_arg_access_qualifier access_qualifier = CL_KERNEL_ARG_ACCESS_NONE;
-    cl_kernel_arg_type_qualifier type_qualifier = CL_KERNEL_ARG_TYPE_NONE;
-    std::string arg_type = "int*";
-    KernelArgInfo arg_info(address_qualifier, access_qualifier, type_qualifier,
-                           arg_type, SINGLE_KERNEL_ARG_NUMBER);
-    const std::string kernel_src = generate_kernel({ arg_info });
-    failed_tests += test_arg_name_size(context, deviceID, kernel_src.c_str());
-    if (test_null_param(context, deviceID, kernel_src.c_str()) != TEST_PASS)
-    {
-        failed_tests++;
-    }
-    return failed_tests;
-static int run_all_tests(cl_context context, cl_device_id deviceID)
-    int failed_scalar_tests = run_scalar_vector_tests(context, deviceID);
-    if (failed_scalar_tests == 0)
-    {
-        log_info("All Data Type Tests Passed\n");
-    }
-    else
-    {
-        log_error("%d Data Type Test(s) Failed\n", failed_scalar_tests);
-    }
-    int failed_image_tests = 0;
-    if (checkForImageSupport(deviceID) == 0)
-    {
-        failed_image_tests = run_image_tests(context, deviceID);
-        if (failed_image_tests == 0)
-        {
-            log_info("All Image Tests Passed\n");
-        }
-        else
-        {
-            log_error("%d Image Test(s) Failed\n", failed_image_tests);
-        }
-    }
-    int failed_pipe_tests = 0;
-    // TODO
-    if (false)
-    {
-        failed_pipe_tests = run_pipe_tests(context, deviceID);
-        if (failed_pipe_tests == 0)
-        {
-            log_info("All Pipe Tests Passed\n");
-        }
-        else
-        {
-            log_error("%d Pipe Test(s) Failed\n", failed_pipe_tests);
+    if (is_extension_available(deviceID, "cl_khr_fp64")) {
+        log_info(" o Device claims extension 'cl_khr_fp64'\n");
+        log_info(" o Expecting SUCCESS when testing double kernel arguments.\n");
+        supports_double = 1;
+    } else {
+        cl_device_fp_config double_fp_config;
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(double_fp_config), &double_fp_config, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
+        if (double_fp_config != 0)
+            supports_double = 1;
+        else {
+            log_info(" o Device lacks extension 'cl_khr_fp64'\n");
+            log_info(" o Not testing double kernel arguments.\n");
+            supports_double = 0;
-    int failed_boundary_tests = run_boundary_tests(context, deviceID);
-    if (failed_boundary_tests == 0)
-    {
-        log_info("All Edge Case Tests Passed\n");
-    }
-    else
-    {
-        log_error("%d Edge Case Test(s) Failed\n", failed_boundary_tests);
+    if (is_extension_available(deviceID, "cl_khr_fp16")) {
+        log_info(" o Device claims extension 'cl_khr_fp16'\n");
+        log_info(" o Expecting SUCCESS when testing halfn* kernel arguments.\n");
+        supports_half = 1;
+    } else {
+        log_info(" o Device lacks extension 'cl_khr_fp16'\n");
+        log_info(" o Not testing halfn* kernel arguments.\n");
+        supports_half = 0;
-    return (failed_scalar_tests + failed_image_tests + failed_pipe_tests
-            + failed_boundary_tests);
+    if (is_extension_available(deviceID, "cl_khr_int64"))
+    {
+        log_info(" o Device claims extension 'cl_khr_int64'\n");
+        log_info(" o Expecting SUCCESS when testing long kernel arguments.\n");
+        supports_long = 1;
+    } else
+    {
+        log_info(" o Device lacks extension 'cl_khr_int64'\n");
+        log_info(" o Not testing long kernel arguments.\n");
+        supports_long = 0;
+    }
-int test_get_kernel_arg_info(cl_device_id deviceID, cl_context context,
-                             cl_command_queue queue, int num_elements)
-    int failed_tests = run_all_tests(context, deviceID);
-    if (failed_tests != 0)
+    error = checkFor3DImageSupport(deviceID);
-        log_error("%d Test(s) Failed\n", failed_tests);
-        return TEST_FAIL;
-    }
-    else
+        log_info(" o Device supports 3D images\n");
+        log_info(" o Expecting SUCCESS when testing 3D image kernel arguments.\n");
+        supports_3D_images = 1;
+    } else
-        return TEST_PASS;
+        log_info(" o Device lacks 3D image support\n");
+        log_info(" o Not testing 3D image kernel arguments.\n");
+        supports_3D_images = 0;
+    int test_failed = 0;
+    // Now create a test program using required arguments
+    log_info("Testing required kernel arguments...\n");
+    error = test(deviceID, context, required_kernel_args, sizeof(required_kernel_args)/sizeof(required_kernel_args[0]), required_arg_info, sizeof(required_arg_info)/sizeof(required_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+    if ( supports_images )
+    {
+        log_info("Testing optional image arguments...\n");
+        error = test(deviceID, context, image_kernel_args, sizeof(image_kernel_args)/sizeof(image_kernel_args[0]), image_arg_info, sizeof(image_arg_info)/sizeof(image_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+    if ( supports_double )
+    {
+        log_info("Testing optional double arguments...\n");
+        error = test(deviceID, context, double_kernel_args, sizeof(double_kernel_args)/sizeof(double_kernel_args[0]), double_arg_info, sizeof(double_arg_info)/sizeof(double_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+    if ( supports_half )
+    {
+        log_info("Testing optional half arguments...\n");
+        error = test(deviceID, context, half_kernel_args, sizeof(half_kernel_args)/sizeof(half_kernel_args[0]), half_arg_info, sizeof(half_arg_info)/sizeof(half_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+    if ( supports_long )
+    {
+        log_info("Testing optional long arguments...\n");
+        error = test(deviceID, context, long_kernel_args, sizeof(long_kernel_args)/sizeof(long_kernel_args[0]), long_arg_info, sizeof(long_arg_info)/sizeof(long_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+    if ( supports_3D_images )
+    {
+        log_info("Testing optional 3D image arguments...\n");
+        error = test(deviceID, context, image_3D_kernel_args, sizeof(image_3D_kernel_args)/sizeof(image_3D_kernel_args[0]), image_3D_arg_info, sizeof(image_3D_arg_info)/sizeof(image_3D_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+    return test_failed;
diff --git a/test_conformance/api/test_kernel_arg_info_compatibility.cpp b/test_conformance/api/test_kernel_arg_info_compatibility.cpp
new file mode 100644
index 0000000..a6b60c2
--- /dev/null
+++ b/test_conformance/api/test_kernel_arg_info_compatibility.cpp
@@ -0,0 +1,5159 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "testBase.h"
+#include <limits.h>
+#include <ctype.h>
+#ifndef _WIN32
+#include <unistd.h>
+#define ARG_INFO_FIELD_COUNT        5
+#define ARG_INFO_ADDR_OFFSET        1
+#define ARG_INFO_ACCESS_OFFSET        2
+typedef char const * kernel_args_t[];
+static kernel_args_t required_kernel_args = {
+    "typedef float4 typedef_type;\n"
+    "\n"
+    "typedef struct struct_type {\n"
+    "    float4 float4d;\n"
+    "    int intd;\n"
+    "} typedef_struct_type;\n"
+    "\n"
+    "typedef union union_type {\n"
+    "    float4 float4d;\n"
+    "    uint4 uint4d;\n"
+    "} typedef_union_type;\n"
+    "\n"
+    "typedef enum enum_type {\n"
+    "    enum_type_zero,\n"
+    "    enum_type_one,\n"
+    "    enum_type_two\n"
+    "} typedef_enum_type;\n"
+    "\n"
+    "kernel void constant_scalar_p0(constant void*constantvoidp,\n"
+    "                              constant char *constantcharp,\n"
+    "                              constant uchar* constantucharp,\n"
+    "                              constant unsigned char * constantunsignedcharp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p1(constant short*constantshortp,\n"
+    "                              constant ushort *constantushortp,\n"
+    "                              constant unsigned short* constantunsignedshortp,\n"
+    "                              constant int * constantintp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p2(constant uint*constantuintp,\n"
+    "                              constant unsigned int *constantunsignedintp,\n"
+    "                              constant long* constantlongp,\n"
+    "                              constant ulong * constantulongp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p3(constant unsigned long*constantunsignedlongp,\n"
+    "                              constant float *constantfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_scalar_restrict_p0(constant void* restrict constantvoidrestrictp,\n"
+    "                                       constant char * restrict constantcharrestrictp,\n"
+    "                                       constant uchar*restrict constantucharrestrictp,\n"
+    "                                       constant unsigned char *restrict constantunsignedcharrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p1(constant short* restrict constantshortrestrictp,\n"
+    "                                       constant ushort * restrict constantushortrestrictp,\n"
+    "                                       constant unsigned short*restrict constantunsignedshortrestrictp,\n"
+    "                                       constant int *restrict constantintrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p2(constant uint* restrict constantuintrestrictp,\n"
+    "                                       constant unsigned int * restrict constantunsignedintrestrictp,\n"
+    "                                       constant long*restrict constantlongrestrictp,\n"
+    "                                       constant ulong *restrict constantulongrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p3(constant unsigned long* restrict constantunsignedlongrestrictp,\n"
+    "                                       constant float * restrict constantfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_p(global void*globalvoidp,\n"
+    "                            global char *globalcharp,\n"
+    "                            global uchar* globalucharp,\n"
+    "                            global unsigned char * globalunsignedcharp,\n"
+    "                            global short*globalshortp,\n"
+    "                            global ushort *globalushortp,\n"
+    "                            global unsigned short* globalunsignedshortp,\n"
+    "                            global int * globalintp,\n"
+    "                            global uint*globaluintp,\n"
+    "                            global unsigned int *globalunsignedintp,\n"
+    "                            global long* globallongp,\n"
+    "                            global ulong * globalulongp,\n"
+    "                            global unsigned long*globalunsignedlongp,\n"
+    "                            global float *globalfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_restrict_p(global void* restrict globalvoidrestrictp,\n"
+    "                                     global char * restrict globalcharrestrictp,\n"
+    "                                     global uchar*restrict globalucharrestrictp,\n"
+    "                                     global unsigned char *restrict globalunsignedcharrestrictp,\n"
+    "                                     global short* restrict globalshortrestrictp,\n"
+    "                                     global ushort * restrict globalushortrestrictp,\n"
+    "                                     global unsigned short*restrict globalunsignedshortrestrictp,\n"
+    "                                     global int *restrict globalintrestrictp,\n"
+    "                                     global uint* restrict globaluintrestrictp,\n"
+    "                                     global unsigned int * restrict globalunsignedintrestrictp,\n"
+    "                                     global long*restrict globallongrestrictp,\n"
+    "                                     global ulong *restrict globalulongrestrictp,\n"
+    "                                     global unsigned long* restrict globalunsignedlongrestrictp,\n"
+    "                                     global float * restrict globalfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_p(global const void*globalconstvoidp,\n"
+    "                                  global const char *globalconstcharp,\n"
+    "                                  global const uchar* globalconstucharp,\n"
+    "                                  global const unsigned char * globalconstunsignedcharp,\n"
+    "                                  global const short*globalconstshortp,\n"
+    "                                  global const ushort *globalconstushortp,\n"
+    "                                  global const unsigned short* globalconstunsignedshortp,\n"
+    "                                  global const int * globalconstintp,\n"
+    "                                  global const uint*globalconstuintp,\n"
+    "                                  global const unsigned int *globalconstunsignedintp,\n"
+    "                                  global const long* globalconstlongp,\n"
+    "                                  global const ulong * globalconstulongp,\n"
+    "                                  global const unsigned long*globalconstunsignedlongp,\n"
+    "                                  global const float *globalconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_restrict_p(global const void* restrict globalconstvoidrestrictp,\n"
+    "                                           global const char * restrict globalconstcharrestrictp,\n"
+    "                                           global const uchar*restrict globalconstucharrestrictp,\n"
+    "                                           global const unsigned char *restrict globalconstunsignedcharrestrictp,\n"
+    "                                           global const short* restrict globalconstshortrestrictp,\n"
+    "                                           global const ushort * restrict globalconstushortrestrictp,\n"
+    "                                           global const unsigned short*restrict globalconstunsignedshortrestrictp,\n"
+    "                                           global const int *restrict globalconstintrestrictp,\n"
+    "                                           global const uint* restrict globalconstuintrestrictp,\n"
+    "                                           global const unsigned int * restrict globalconstunsignedintrestrictp,\n"
+    "                                           global const long*restrict globalconstlongrestrictp,\n"
+    "                                           global const ulong *restrict globalconstulongrestrictp,\n"
+    "                                           global const unsigned long* restrict globalconstunsignedlongrestrictp,\n"
+    "                                           global const float * restrict globalconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_p(global volatile void*globalvolatilevoidp,\n"
+    "                                     global volatile char *globalvolatilecharp,\n"
+    "                                     global volatile uchar* globalvolatileucharp,\n"
+    "                                     global volatile unsigned char * globalvolatileunsignedcharp,\n"
+    "                                     global volatile short*globalvolatileshortp,\n"
+    "                                     global volatile ushort *globalvolatileushortp,\n"
+    "                                     global volatile unsigned short* globalvolatileunsignedshortp,\n"
+    "                                     global volatile int * globalvolatileintp,\n"
+    "                                     global volatile uint*globalvolatileuintp,\n"
+    "                                     global volatile unsigned int *globalvolatileunsignedintp,\n"
+    "                                     global volatile long* globalvolatilelongp,\n"
+    "                                     global volatile ulong * globalvolatileulongp,\n"
+    "                                     global volatile unsigned long*globalvolatileunsignedlongp,\n"
+    "                                     global volatile float *globalvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_restrict_p(global volatile void* restrict globalvolatilevoidrestrictp,\n"
+    "                                              global volatile char * restrict globalvolatilecharrestrictp,\n"
+    "                                              global volatile uchar*restrict globalvolatileucharrestrictp,\n"
+    "                                              global volatile unsigned char *restrict globalvolatileunsignedcharrestrictp,\n"
+    "                                              global volatile short* restrict globalvolatileshortrestrictp,\n"
+    "                                              global volatile ushort * restrict globalvolatileushortrestrictp,\n"
+    "                                              global volatile unsigned short*restrict globalvolatileunsignedshortrestrictp,\n"
+    "                                              global volatile int *restrict globalvolatileintrestrictp,\n"
+    "                                              global volatile uint* restrict globalvolatileuintrestrictp,\n"
+    "                                              global volatile unsigned int * restrict globalvolatileunsignedintrestrictp,\n"
+    "                                              global volatile long*restrict globalvolatilelongrestrictp,\n"
+    "                                              global volatile ulong *restrict globalvolatileulongrestrictp,\n"
+    "                                              global volatile unsigned long* restrict globalvolatileunsignedlongrestrictp,\n"
+    "                                              global volatile float * restrict globalvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_p(global const volatile void*globalconstvolatilevoidp,\n"
+    "                                           global const volatile char *globalconstvolatilecharp,\n"
+    "                                           global const volatile uchar* globalconstvolatileucharp,\n"
+    "                                           global const volatile unsigned char * globalconstvolatileunsignedcharp,\n"
+    "                                           global const volatile short*globalconstvolatileshortp,\n"
+    "                                           global const volatile ushort *globalconstvolatileushortp,\n"
+    "                                           global const volatile unsigned short* globalconstvolatileunsignedshortp,\n"
+    "                                           global const volatile int * globalconstvolatileintp,\n"
+    "                                           global const volatile uint*globalconstvolatileuintp,\n"
+    "                                           global const volatile unsigned int *globalconstvolatileunsignedintp,\n"
+    "                                           global const volatile long* globalconstvolatilelongp,\n"
+    "                                           global const volatile ulong * globalconstvolatileulongp,\n"
+    "                                           global const volatile unsigned long*globalconstvolatileunsignedlongp,\n"
+    "                                           global const volatile float *globalconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_restrict_p(global const volatile void* restrict globalconstvolatilevoidrestrictp,\n"
+    "                                                    global const volatile char * restrict globalconstvolatilecharrestrictp,\n"
+    "                                                    global const volatile uchar*restrict globalconstvolatileucharrestrictp,\n"
+    "                                                    global const volatile unsigned char *restrict globalconstvolatileunsignedcharrestrictp,\n"
+    "                                                    global const volatile short* restrict globalconstvolatileshortrestrictp,\n"
+    "                                                    global const volatile ushort * restrict globalconstvolatileushortrestrictp,\n"
+    "                                                    global const volatile unsigned short*restrict globalconstvolatileunsignedshortrestrictp,\n"
+    "                                                    global const volatile int *restrict globalconstvolatileintrestrictp,\n"
+    "                                                    global const volatile uint* restrict globalconstvolatileuintrestrictp,\n"
+    "                                                    global const volatile unsigned int * restrict globalconstvolatileunsignedintrestrictp,\n"
+    "                                                    global const volatile long*restrict globalconstvolatilelongrestrictp,\n"
+    "                                                    global const volatile ulong *restrict globalconstvolatileulongrestrictp,\n"
+    "                                                    global const volatile unsigned long* restrict globalconstvolatileunsignedlongrestrictp,\n"
+    "                                                    global const volatile float * restrict globalconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_p(local void*localvoidp,\n"
+    "                           local char *localcharp,\n"
+    "                           local uchar* localucharp,\n"
+    "                           local unsigned char * localunsignedcharp,\n"
+    "                           local short*localshortp,\n"
+    "                           local ushort *localushortp,\n"
+    "                           local unsigned short* localunsignedshortp,\n"
+    "                           local int * localintp,\n"
+    "                           local uint*localuintp,\n"
+    "                           local unsigned int *localunsignedintp,\n"
+    "                           local long* locallongp,\n"
+    "                           local ulong * localulongp,\n"
+    "                           local unsigned long*localunsignedlongp,\n"
+    "                           local float *localfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_restrict_p(local void* restrict localvoidrestrictp,\n"
+    "                                    local char * restrict localcharrestrictp,\n"
+    "                                    local uchar*restrict localucharrestrictp,\n"
+    "                                    local unsigned char *restrict localunsignedcharrestrictp,\n"
+    "                                    local short* restrict localshortrestrictp,\n"
+    "                                    local ushort * restrict localushortrestrictp,\n"
+    "                                    local unsigned short*restrict localunsignedshortrestrictp,\n"
+    "                                    local int *restrict localintrestrictp,\n"
+    "                                    local uint* restrict localuintrestrictp,\n"
+    "                                    local unsigned int * restrict localunsignedintrestrictp,\n"
+    "                                    local long*restrict locallongrestrictp,\n"
+    "                                    local ulong *restrict localulongrestrictp,\n"
+    "                                    local unsigned long* restrict localunsignedlongrestrictp,\n"
+    "                                    local float * restrict localfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_p(local const void*localconstvoidp,\n"
+    "                                 local const char *localconstcharp,\n"
+    "                                 local const uchar* localconstucharp,\n"
+    "                                 local const unsigned char * localconstunsignedcharp,\n"
+    "                                 local const short*localconstshortp,\n"
+    "                                 local const ushort *localconstushortp,\n"
+    "                                 local const unsigned short* localconstunsignedshortp,\n"
+    "                                 local const int * localconstintp,\n"
+    "                                 local const uint*localconstuintp,\n"
+    "                                 local const unsigned int *localconstunsignedintp,\n"
+    "                                 local const long* localconstlongp,\n"
+    "                                 local const ulong * localconstulongp,\n"
+    "                                 local const unsigned long*localconstunsignedlongp,\n"
+    "                                 local const float *localconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_restrict_p(local const void* restrict localconstvoidrestrictp,\n"
+    "                                          local const char * restrict localconstcharrestrictp,\n"
+    "                                          local const uchar*restrict localconstucharrestrictp,\n"
+    "                                          local const unsigned char *restrict localconstunsignedcharrestrictp,\n"
+    "                                          local const short* restrict localconstshortrestrictp,\n"
+    "                                          local const ushort * restrict localconstushortrestrictp,\n"
+    "                                          local const unsigned short*restrict localconstunsignedshortrestrictp,\n"
+    "                                          local const int *restrict localconstintrestrictp,\n"
+    "                                          local const uint* restrict localconstuintrestrictp,\n"
+    "                                          local const unsigned int * restrict localconstunsignedintrestrictp,\n"
+    "                                          local const long*restrict localconstlongrestrictp,\n"
+    "                                          local const ulong *restrict localconstulongrestrictp,\n"
+    "                                          local const unsigned long* restrict localconstunsignedlongrestrictp,\n"
+    "                                          local const float * restrict localconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_p(local volatile void*localvolatilevoidp,\n"
+    "                                    local volatile char *localvolatilecharp,\n"
+    "                                    local volatile uchar* localvolatileucharp,\n"
+    "                                    local volatile unsigned char * localvolatileunsignedcharp,\n"
+    "                                    local volatile short*localvolatileshortp,\n"
+    "                                    local volatile ushort *localvolatileushortp,\n"
+    "                                    local volatile unsigned short* localvolatileunsignedshortp,\n"
+    "                                    local volatile int * localvolatileintp,\n"
+    "                                    local volatile uint*localvolatileuintp,\n"
+    "                                    local volatile unsigned int *localvolatileunsignedintp,\n"
+    "                                    local volatile long* localvolatilelongp,\n"
+    "                                    local volatile ulong * localvolatileulongp,\n"
+    "                                    local volatile unsigned long*localvolatileunsignedlongp,\n"
+    "                                    local volatile float *localvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_restrict_p(local volatile void* restrict localvolatilevoidrestrictp,\n"
+    "                                             local volatile char * restrict localvolatilecharrestrictp,\n"
+    "                                             local volatile uchar*restrict localvolatileucharrestrictp,\n"
+    "                                             local volatile unsigned char *restrict localvolatileunsignedcharrestrictp,\n"
+    "                                             local volatile short* restrict localvolatileshortrestrictp,\n"
+    "                                             local volatile ushort * restrict localvolatileushortrestrictp,\n"
+    "                                             local volatile unsigned short*restrict localvolatileunsignedshortrestrictp,\n"
+    "                                             local volatile int *restrict localvolatileintrestrictp,\n"
+    "                                             local volatile uint* restrict localvolatileuintrestrictp,\n"
+    "                                             local volatile unsigned int * restrict localvolatileunsignedintrestrictp,\n"
+    "                                             local volatile long*restrict localvolatilelongrestrictp,\n"
+    "                                             local volatile ulong *restrict localvolatileulongrestrictp,\n"
+    "                                             local volatile unsigned long* restrict localvolatileunsignedlongrestrictp,\n"
+    "                                             local volatile float * restrict localvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_p(local const volatile void*localconstvolatilevoidp,\n"
+    "                                          local const volatile char *localconstvolatilecharp,\n"
+    "                                          local const volatile uchar* localconstvolatileucharp,\n"
+    "                                          local const volatile unsigned char * localconstvolatileunsignedcharp,\n"
+    "                                          local const volatile short*localconstvolatileshortp,\n"
+    "                                          local const volatile ushort *localconstvolatileushortp,\n"
+    "                                          local const volatile unsigned short* localconstvolatileunsignedshortp,\n"
+    "                                          local const volatile int * localconstvolatileintp,\n"
+    "                                          local const volatile uint*localconstvolatileuintp,\n"
+    "                                          local const volatile unsigned int *localconstvolatileunsignedintp,\n"
+    "                                          local const volatile long* localconstvolatilelongp,\n"
+    "                                          local const volatile ulong * localconstvolatileulongp,\n"
+    "                                          local const volatile unsigned long*localconstvolatileunsignedlongp,\n"
+    "                                          local const volatile float *localconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_restrict_p(local const volatile void* restrict localconstvolatilevoidrestrictp,\n"
+    "                                                   local const volatile char * restrict localconstvolatilecharrestrictp,\n"
+    "                                                   local const volatile uchar*restrict localconstvolatileucharrestrictp,\n"
+    "                                                   local const volatile unsigned char *restrict localconstvolatileunsignedcharrestrictp,\n"
+    "                                                   local const volatile short* restrict localconstvolatileshortrestrictp,\n"
+    "                                                   local const volatile ushort * restrict localconstvolatileushortrestrictp,\n"
+    "                                                   local const volatile unsigned short*restrict localconstvolatileunsignedshortrestrictp,\n"
+    "                                                   local const volatile int *restrict localconstvolatileintrestrictp,\n"
+    "                                                   local const volatile uint* restrict localconstvolatileuintrestrictp,\n"
+    "                                                   local const volatile unsigned int * restrict localconstvolatileunsignedintrestrictp,\n"
+    "                                                   local const volatile long*restrict localconstvolatilelongrestrictp,\n"
+    "                                                   local const volatile ulong *restrict localconstvolatileulongrestrictp,\n"
+    "                                                   local const volatile unsigned long* restrict localconstvolatileunsignedlongrestrictp,\n"
+    "                                                   local const volatile float * restrict localconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void scalar_d(char chard,\n"
+    "                     uchar uchard,\n"
+    "                     unsigned char unsignedchard,\n"
+    "                     short shortd,\n"
+    "                     ushort ushortd,\n"
+    "                     unsigned short unsignedshortd,\n"
+    "                     int intd,\n"
+    "                     uint uintd,\n"
+    "                     unsigned int unsignedintd,\n"
+    "                     long longd,\n"
+    "                     ulong ulongd,\n"
+    "                     unsigned long unsignedlongd,\n"
+    "                     float floatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_scalar_d(const char constchard,\n"
+    "                           const uchar constuchard,\n"
+    "                           const unsigned char constunsignedchard,\n"
+    "                           const short constshortd,\n"
+    "                           const ushort constushortd,\n"
+    "                           const unsigned short constunsignedshortd,\n"
+    "                           const int constintd,\n"
+    "                           const uint constuintd,\n"
+    "                           const unsigned int constunsignedintd,\n"
+    "                           const long constlongd,\n"
+    "                           const ulong constulongd,\n"
+    "                           const unsigned long constunsignedlongd,\n"
+    "                           const float constfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_scalar_d(private char privatechard,\n"
+    "                             private uchar privateuchard,\n"
+    "                             private unsigned char privateunsignedchard,\n"
+    "                             private short privateshortd,\n"
+    "                             private ushort privateushortd,\n"
+    "                             private unsigned short privateunsignedshortd,\n"
+    "                             private int privateintd,\n"
+    "                             private uint privateuintd,\n"
+    "                             private unsigned int privateunsignedintd,\n"
+    "                             private long privatelongd,\n"
+    "                             private ulong privateulongd,\n"
+    "                             private unsigned long privateunsignedlongd,\n"
+    "                             private float privatefloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_scalar_d(private const char privateconstchard,\n"
+    "                                   private const uchar privateconstuchard,\n"
+    "                                   private const unsigned char privateconstunsignedchard,\n"
+    "                                   private const short privateconstshortd,\n"
+    "                                   private const ushort privateconstushortd,\n"
+    "                                   private const unsigned short privateconstunsignedshortd,\n"
+    "                                   private const int privateconstintd,\n"
+    "                                   private const uint privateconstuintd,\n"
+    "                                   private const unsigned int privateconstunsignedintd,\n"
+    "                                   private const long privateconstlongd,\n"
+    "                                   private const ulong privateconstulongd,\n"
+    "                                   private const unsigned long privateconstunsignedlongd,\n"
+    "                                   private const float privateconstfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_p0(constant char2*constantchar2p,\n"
+    "                               constant uchar2 *constantuchar2p,\n"
+    "                               constant short2* constantshort2p,\n"
+    "                               constant ushort2 * constantushort2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p1(constant int2*constantint2p,\n"
+    "                               constant uint2 *constantuint2p,\n"
+    "                               constant long2* constantlong2p,\n"
+    "                               constant ulong2 * constantulong2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p2(constant float2*constantfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p0(constant char2 *restrict constantchar2restrictp,\n"
+    "                                        constant uchar2* restrict constantuchar2restrictp,\n"
+    "                                        constant short2 * restrict constantshort2restrictp,\n"
+    "                                        constant ushort2*restrict constantushort2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p1(constant int2 *restrict constantint2restrictp,\n"
+    "                                        constant uint2* restrict constantuint2restrictp,\n"
+    "                                        constant long2 * restrict constantlong2restrictp,\n"
+    "                                        constant ulong2*restrict constantulong2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p2(constant float2 *restrict constantfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_p(global char2*globalchar2p,\n"
+    "                             global uchar2 *globaluchar2p,\n"
+    "                             global short2* globalshort2p,\n"
+    "                             global ushort2 * globalushort2p,\n"
+    "                             global int2*globalint2p,\n"
+    "                             global uint2 *globaluint2p,\n"
+    "                             global long2* globallong2p,\n"
+    "                             global ulong2 * globalulong2p,\n"
+    "                             global float2*globalfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_restrict_p(global char2 *restrict globalchar2restrictp,\n"
+    "                                      global uchar2* restrict globaluchar2restrictp,\n"
+    "                                      global short2 * restrict globalshort2restrictp,\n"
+    "                                      global ushort2*restrict globalushort2restrictp,\n"
+    "                                      global int2 *restrict globalint2restrictp,\n"
+    "                                      global uint2* restrict globaluint2restrictp,\n"
+    "                                      global long2 * restrict globallong2restrictp,\n"
+    "                                      global ulong2*restrict globalulong2restrictp,\n"
+    "                                      global float2 *restrict globalfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_p(global const char2* globalconstchar2p,\n"
+    "                                   global const uchar2 * globalconstuchar2p,\n"
+    "                                   global const short2*globalconstshort2p,\n"
+    "                                   global const ushort2 *globalconstushort2p,\n"
+    "                                   global const int2* globalconstint2p,\n"
+    "                                   global const uint2 * globalconstuint2p,\n"
+    "                                   global const long2*globalconstlong2p,\n"
+    "                                   global const ulong2 *globalconstulong2p,\n"
+    "                                   global const float2* globalconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_restrict_p(global const char2 * restrict globalconstchar2restrictp,\n"
+    "                                            global const uchar2*restrict globalconstuchar2restrictp,\n"
+    "                                            global const short2 *restrict globalconstshort2restrictp,\n"
+    "                                            global const ushort2* restrict globalconstushort2restrictp,\n"
+    "                                            global const int2 * restrict globalconstint2restrictp,\n"
+    "                                            global const uint2*restrict globalconstuint2restrictp,\n"
+    "                                            global const long2 *restrict globalconstlong2restrictp,\n"
+    "                                            global const ulong2* restrict globalconstulong2restrictp,\n"
+    "                                            global const float2 * restrict globalconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_p(global volatile char2*globalvolatilechar2p,\n"
+    "                                      global volatile uchar2 *globalvolatileuchar2p,\n"
+    "                                      global volatile short2* globalvolatileshort2p,\n"
+    "                                      global volatile ushort2 * globalvolatileushort2p,\n"
+    "                                      global volatile int2*globalvolatileint2p,\n"
+    "                                      global volatile uint2 *globalvolatileuint2p,\n"
+    "                                      global volatile long2* globalvolatilelong2p,\n"
+    "                                      global volatile ulong2 * globalvolatileulong2p,\n"
+    "                                      global volatile float2*globalvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_restrict_p(global volatile char2 *restrict globalvolatilechar2restrictp,\n"
+    "                                               global volatile uchar2* restrict globalvolatileuchar2restrictp,\n"
+    "                                               global volatile short2 * restrict globalvolatileshort2restrictp,\n"
+    "                                               global volatile ushort2*restrict globalvolatileushort2restrictp,\n"
+    "                                               global volatile int2 *restrict globalvolatileint2restrictp,\n"
+    "                                               global volatile uint2* restrict globalvolatileuint2restrictp,\n"
+    "                                               global volatile long2 * restrict globalvolatilelong2restrictp,\n"
+    "                                               global volatile ulong2*restrict globalvolatileulong2restrictp,\n"
+    "                                               global volatile float2 *restrict globalvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_p(global const volatile char2* globalconstvolatilechar2p,\n"
+    "                                            global const volatile uchar2 * globalconstvolatileuchar2p,\n"
+    "                                            global const volatile short2*globalconstvolatileshort2p,\n"
+    "                                            global const volatile ushort2 *globalconstvolatileushort2p,\n"
+    "                                            global const volatile int2* globalconstvolatileint2p,\n"
+    "                                            global const volatile uint2 * globalconstvolatileuint2p,\n"
+    "                                            global const volatile long2*globalconstvolatilelong2p,\n"
+    "                                            global const volatile ulong2 *globalconstvolatileulong2p,\n"
+    "                                            global const volatile float2* globalconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_restrict_p(global const volatile char2 * restrict globalconstvolatilechar2restrictp,\n"
+    "                                                     global const volatile uchar2*restrict globalconstvolatileuchar2restrictp,\n"
+    "                                                     global const volatile short2 *restrict globalconstvolatileshort2restrictp,\n"
+    "                                                     global const volatile ushort2* restrict globalconstvolatileushort2restrictp,\n"
+    "                                                     global const volatile int2 * restrict globalconstvolatileint2restrictp,\n"
+    "                                                     global const volatile uint2*restrict globalconstvolatileuint2restrictp,\n"
+    "                                                     global const volatile long2 *restrict globalconstvolatilelong2restrictp,\n"
+    "                                                     global const volatile ulong2* restrict globalconstvolatileulong2restrictp,\n"
+    "                                                     global const volatile float2 * restrict globalconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_p(local char2*localchar2p,\n"
+    "                            local uchar2 *localuchar2p,\n"
+    "                            local short2* localshort2p,\n"
+    "                            local ushort2 * localushort2p,\n"
+    "                            local int2*localint2p,\n"
+    "                            local uint2 *localuint2p,\n"
+    "                            local long2* locallong2p,\n"
+    "                            local ulong2 * localulong2p,\n"
+    "                            local float2*localfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_restrict_p(local char2 *restrict localchar2restrictp,\n"
+    "                                     local uchar2* restrict localuchar2restrictp,\n"
+    "                                     local short2 * restrict localshort2restrictp,\n"
+    "                                     local ushort2*restrict localushort2restrictp,\n"
+    "                                     local int2 *restrict localint2restrictp,\n"
+    "                                     local uint2* restrict localuint2restrictp,\n"
+    "                                     local long2 * restrict locallong2restrictp,\n"
+    "                                     local ulong2*restrict localulong2restrictp,\n"
+    "                                     local float2 *restrict localfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_p(local const char2* localconstchar2p,\n"
+    "                                  local const uchar2 * localconstuchar2p,\n"
+    "                                  local const short2*localconstshort2p,\n"
+    "                                  local const ushort2 *localconstushort2p,\n"
+    "                                  local const int2* localconstint2p,\n"
+    "                                  local const uint2 * localconstuint2p,\n"
+    "                                  local const long2*localconstlong2p,\n"
+    "                                  local const ulong2 *localconstulong2p,\n"
+    "                                  local const float2* localconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_restrict_p(local const char2 * restrict localconstchar2restrictp,\n"
+    "                                           local const uchar2*restrict localconstuchar2restrictp,\n"
+    "                                           local const short2 *restrict localconstshort2restrictp,\n"
+    "                                           local const ushort2* restrict localconstushort2restrictp,\n"
+    "                                           local const int2 * restrict localconstint2restrictp,\n"
+    "                                           local const uint2*restrict localconstuint2restrictp,\n"
+    "                                           local const long2 *restrict localconstlong2restrictp,\n"
+    "                                           local const ulong2* restrict localconstulong2restrictp,\n"
+    "                                           local const float2 * restrict localconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_p(local volatile char2*localvolatilechar2p,\n"
+    "                                     local volatile uchar2 *localvolatileuchar2p,\n"
+    "                                     local volatile short2* localvolatileshort2p,\n"
+    "                                     local volatile ushort2 * localvolatileushort2p,\n"
+    "                                     local volatile int2*localvolatileint2p,\n"
+    "                                     local volatile uint2 *localvolatileuint2p,\n"
+    "                                     local volatile long2* localvolatilelong2p,\n"
+    "                                     local volatile ulong2 * localvolatileulong2p,\n"
+    "                                     local volatile float2*localvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_restrict_p(local volatile char2 *restrict localvolatilechar2restrictp,\n"
+    "                                              local volatile uchar2* restrict localvolatileuchar2restrictp,\n"
+    "                                              local volatile short2 * restrict localvolatileshort2restrictp,\n"
+    "                                              local volatile ushort2*restrict localvolatileushort2restrictp,\n"
+    "                                              local volatile int2 *restrict localvolatileint2restrictp,\n"
+    "                                              local volatile uint2* restrict localvolatileuint2restrictp,\n"
+    "                                              local volatile long2 * restrict localvolatilelong2restrictp,\n"
+    "                                              local volatile ulong2*restrict localvolatileulong2restrictp,\n"
+    "                                              local volatile float2 *restrict localvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_p(local const volatile char2* localconstvolatilechar2p,\n"
+    "                                           local const volatile uchar2 * localconstvolatileuchar2p,\n"
+    "                                           local const volatile short2*localconstvolatileshort2p,\n"
+    "                                           local const volatile ushort2 *localconstvolatileushort2p,\n"
+    "                                           local const volatile int2* localconstvolatileint2p,\n"
+    "                                           local const volatile uint2 * localconstvolatileuint2p,\n"
+    "                                           local const volatile long2*localconstvolatilelong2p,\n"
+    "                                           local const volatile ulong2 *localconstvolatileulong2p,\n"
+    "                                           local const volatile float2* localconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_restrict_p(local const volatile char2 * restrict localconstvolatilechar2restrictp,\n"
+    "                                                    local const volatile uchar2*restrict localconstvolatileuchar2restrictp,\n"
+    "                                                    local const volatile short2 *restrict localconstvolatileshort2restrictp,\n"
+    "                                                    local const volatile ushort2* restrict localconstvolatileushort2restrictp,\n"
+    "                                                    local const volatile int2 * restrict localconstvolatileint2restrictp,\n"
+    "                                                    local const volatile uint2*restrict localconstvolatileuint2restrictp,\n"
+    "                                                    local const volatile long2 *restrict localconstvolatilelong2restrictp,\n"
+    "                                                    local const volatile ulong2* restrict localconstvolatileulong2restrictp,\n"
+    "                                                    local const volatile float2 * restrict localconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector2_d(char2 char2d,\n"
+    "                      uchar2 uchar2d,\n"
+    "                      short2 short2d,\n"
+    "                      ushort2 ushort2d,\n"
+    "                      int2 int2d,\n"
+    "                      uint2 uint2d,\n"
+    "                      long2 long2d,\n"
+    "                      ulong2 ulong2d,\n"
+    "                      float2 float2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector2_d(const char2 constchar2d,\n"
+    "                            const uchar2 constuchar2d,\n"
+    "                            const short2 constshort2d,\n"
+    "                            const ushort2 constushort2d,\n"
+    "                            const int2 constint2d,\n"
+    "                            const uint2 constuint2d,\n"
+    "                            const long2 constlong2d,\n"
+    "                            const ulong2 constulong2d,\n"
+    "                            const float2 constfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector2_d(private char2 privatechar2d,\n"
+    "                              private uchar2 privateuchar2d,\n"
+    "                              private short2 privateshort2d,\n"
+    "                              private ushort2 privateushort2d,\n"
+    "                              private int2 privateint2d,\n"
+    "                              private uint2 privateuint2d,\n"
+    "                              private long2 privatelong2d,\n"
+    "                              private ulong2 privateulong2d,\n"
+    "                              private float2 privatefloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector2_d(private const char2 privateconstchar2d,\n"
+    "                                    private const uchar2 privateconstuchar2d,\n"
+    "                                    private const short2 privateconstshort2d,\n"
+    "                                    private const ushort2 privateconstushort2d,\n"
+    "                                    private const int2 privateconstint2d,\n"
+    "                                    private const uint2 privateconstuint2d,\n"
+    "                                    private const long2 privateconstlong2d,\n"
+    "                                    private const ulong2 privateconstulong2d,\n"
+    "                                    private const float2 privateconstfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p0(constant char3*constantchar3p,\n"
+    "                               constant uchar3 *constantuchar3p,\n"
+    "                               constant short3* constantshort3p,\n"
+    "                               constant ushort3 * constantushort3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p1(constant int3*constantint3p,\n"
+    "                               constant uint3 *constantuint3p,\n"
+    "                               constant long3* constantlong3p,\n"
+    "                               constant ulong3 * constantulong3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p2(constant float3*constantfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p0(constant char3 *restrict constantchar3restrictp,\n"
+    "                                        constant uchar3* restrict constantuchar3restrictp,\n"
+    "                                        constant short3 * restrict constantshort3restrictp,\n"
+    "                                        constant ushort3*restrict constantushort3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p1(constant int3 *restrict constantint3restrictp,\n"
+    "                                        constant uint3* restrict constantuint3restrictp,\n"
+    "                                        constant long3 * restrict constantlong3restrictp,\n"
+    "                                        constant ulong3*restrict constantulong3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p2(constant float3 *restrict constantfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_p(global char3*globalchar3p,\n"
+    "                             global uchar3 *globaluchar3p,\n"
+    "                             global short3* globalshort3p,\n"
+    "                             global ushort3 * globalushort3p,\n"
+    "                             global int3*globalint3p,\n"
+    "                             global uint3 *globaluint3p,\n"
+    "                             global long3* globallong3p,\n"
+    "                             global ulong3 * globalulong3p,\n"
+    "                             global float3*globalfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_restrict_p(global char3 *restrict globalchar3restrictp,\n"
+    "                                      global uchar3* restrict globaluchar3restrictp,\n"
+    "                                      global short3 * restrict globalshort3restrictp,\n"
+    "                                      global ushort3*restrict globalushort3restrictp,\n"
+    "                                      global int3 *restrict globalint3restrictp,\n"
+    "                                      global uint3* restrict globaluint3restrictp,\n"
+    "                                      global long3 * restrict globallong3restrictp,\n"
+    "                                      global ulong3*restrict globalulong3restrictp,\n"
+    "                                      global float3 *restrict globalfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_p(global const char3* globalconstchar3p,\n"
+    "                                   global const uchar3 * globalconstuchar3p,\n"
+    "                                   global const short3*globalconstshort3p,\n"
+    "                                   global const ushort3 *globalconstushort3p,\n"
+    "                                   global const int3* globalconstint3p,\n"
+    "                                   global const uint3 * globalconstuint3p,\n"
+    "                                   global const long3*globalconstlong3p,\n"
+    "                                   global const ulong3 *globalconstulong3p,\n"
+    "                                   global const float3* globalconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_restrict_p(global const char3 * restrict globalconstchar3restrictp,\n"
+    "                                            global const uchar3*restrict globalconstuchar3restrictp,\n"
+    "                                            global const short3 *restrict globalconstshort3restrictp,\n"
+    "                                            global const ushort3* restrict globalconstushort3restrictp,\n"
+    "                                            global const int3 * restrict globalconstint3restrictp,\n"
+    "                                            global const uint3*restrict globalconstuint3restrictp,\n"
+    "                                            global const long3 *restrict globalconstlong3restrictp,\n"
+    "                                            global const ulong3* restrict globalconstulong3restrictp,\n"
+    "                                            global const float3 * restrict globalconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_p(global volatile char3*globalvolatilechar3p,\n"
+    "                                      global volatile uchar3 *globalvolatileuchar3p,\n"
+    "                                      global volatile short3* globalvolatileshort3p,\n"
+    "                                      global volatile ushort3 * globalvolatileushort3p,\n"
+    "                                      global volatile int3*globalvolatileint3p,\n"
+    "                                      global volatile uint3 *globalvolatileuint3p,\n"
+    "                                      global volatile long3* globalvolatilelong3p,\n"
+    "                                      global volatile ulong3 * globalvolatileulong3p,\n"
+    "                                      global volatile float3*globalvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_restrict_p(global volatile char3 *restrict globalvolatilechar3restrictp,\n"
+    "                                               global volatile uchar3* restrict globalvolatileuchar3restrictp,\n"
+    "                                               global volatile short3 * restrict globalvolatileshort3restrictp,\n"
+    "                                               global volatile ushort3*restrict globalvolatileushort3restrictp,\n"
+    "                                               global volatile int3 *restrict globalvolatileint3restrictp,\n"
+    "                                               global volatile uint3* restrict globalvolatileuint3restrictp,\n"
+    "                                               global volatile long3 * restrict globalvolatilelong3restrictp,\n"
+    "                                               global volatile ulong3*restrict globalvolatileulong3restrictp,\n"
+    "                                               global volatile float3 *restrict globalvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_p(global const volatile char3* globalconstvolatilechar3p,\n"
+    "                                            global const volatile uchar3 * globalconstvolatileuchar3p,\n"
+    "                                            global const volatile short3*globalconstvolatileshort3p,\n"
+    "                                            global const volatile ushort3 *globalconstvolatileushort3p,\n"
+    "                                            global const volatile int3* globalconstvolatileint3p,\n"
+    "                                            global const volatile uint3 * globalconstvolatileuint3p,\n"
+    "                                            global const volatile long3*globalconstvolatilelong3p,\n"
+    "                                            global const volatile ulong3 *globalconstvolatileulong3p,\n"
+    "                                            global const volatile float3* globalconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_restrict_p(global const volatile char3 * restrict globalconstvolatilechar3restrictp,\n"
+    "                                                     global const volatile uchar3*restrict globalconstvolatileuchar3restrictp,\n"
+    "                                                     global const volatile short3 *restrict globalconstvolatileshort3restrictp,\n"
+    "                                                     global const volatile ushort3* restrict globalconstvolatileushort3restrictp,\n"
+    "                                                     global const volatile int3 * restrict globalconstvolatileint3restrictp,\n"
+    "                                                     global const volatile uint3*restrict globalconstvolatileuint3restrictp,\n"
+    "                                                     global const volatile long3 *restrict globalconstvolatilelong3restrictp,\n"
+    "                                                     global const volatile ulong3* restrict globalconstvolatileulong3restrictp,\n"
+    "                                                     global const volatile float3 * restrict globalconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_p(local char3*localchar3p,\n"
+    "                            local uchar3 *localuchar3p,\n"
+    "                            local short3* localshort3p,\n"
+    "                            local ushort3 * localushort3p,\n"
+    "                            local int3*localint3p,\n"
+    "                            local uint3 *localuint3p,\n"
+    "                            local long3* locallong3p,\n"
+    "                            local ulong3 * localulong3p,\n"
+    "                            local float3*localfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_restrict_p(local char3 *restrict localchar3restrictp,\n"
+    "                                     local uchar3* restrict localuchar3restrictp,\n"
+    "                                     local short3 * restrict localshort3restrictp,\n"
+    "                                     local ushort3*restrict localushort3restrictp,\n"
+    "                                     local int3 *restrict localint3restrictp,\n"
+    "                                     local uint3* restrict localuint3restrictp,\n"
+    "                                     local long3 * restrict locallong3restrictp,\n"
+    "                                     local ulong3*restrict localulong3restrictp,\n"
+    "                                     local float3 *restrict localfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_p(local const char3* localconstchar3p,\n"
+    "                                  local const uchar3 * localconstuchar3p,\n"
+    "                                  local const short3*localconstshort3p,\n"
+    "                                  local const ushort3 *localconstushort3p,\n"
+    "                                  local const int3* localconstint3p,\n"
+    "                                  local const uint3 * localconstuint3p,\n"
+    "                                  local const long3*localconstlong3p,\n"
+    "                                  local const ulong3 *localconstulong3p,\n"
+    "                                  local const float3* localconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_restrict_p(local const char3 * restrict localconstchar3restrictp,\n"
+    "                                           local const uchar3*restrict localconstuchar3restrictp,\n"
+    "                                           local const short3 *restrict localconstshort3restrictp,\n"
+    "                                           local const ushort3* restrict localconstushort3restrictp,\n"
+    "                                           local const int3 * restrict localconstint3restrictp,\n"
+    "                                           local const uint3*restrict localconstuint3restrictp,\n"
+    "                                           local const long3 *restrict localconstlong3restrictp,\n"
+    "                                           local const ulong3* restrict localconstulong3restrictp,\n"
+    "                                           local const float3 * restrict localconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_p(local volatile char3*localvolatilechar3p,\n"
+    "                                     local volatile uchar3 *localvolatileuchar3p,\n"
+    "                                     local volatile short3* localvolatileshort3p,\n"
+    "                                     local volatile ushort3 * localvolatileushort3p,\n"
+    "                                     local volatile int3*localvolatileint3p,\n"
+    "                                     local volatile uint3 *localvolatileuint3p,\n"
+    "                                     local volatile long3* localvolatilelong3p,\n"
+    "                                     local volatile ulong3 * localvolatileulong3p,\n"
+    "                                     local volatile float3*localvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_restrict_p(local volatile char3 *restrict localvolatilechar3restrictp,\n"
+    "                                              local volatile uchar3* restrict localvolatileuchar3restrictp,\n"
+    "                                              local volatile short3 * restrict localvolatileshort3restrictp,\n"
+    "                                              local volatile ushort3*restrict localvolatileushort3restrictp,\n"
+    "                                              local volatile int3 *restrict localvolatileint3restrictp,\n"
+    "                                              local volatile uint3* restrict localvolatileuint3restrictp,\n"
+    "                                              local volatile long3 * restrict localvolatilelong3restrictp,\n"
+    "                                              local volatile ulong3*restrict localvolatileulong3restrictp,\n"
+    "                                              local volatile float3 *restrict localvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_p(local const volatile char3* localconstvolatilechar3p,\n"
+    "                                           local const volatile uchar3 * localconstvolatileuchar3p,\n"
+    "                                           local const volatile short3*localconstvolatileshort3p,\n"
+    "                                           local const volatile ushort3 *localconstvolatileushort3p,\n"
+    "                                           local const volatile int3* localconstvolatileint3p,\n"
+    "                                           local const volatile uint3 * localconstvolatileuint3p,\n"
+    "                                           local const volatile long3*localconstvolatilelong3p,\n"
+    "                                           local const volatile ulong3 *localconstvolatileulong3p,\n"
+    "                                           local const volatile float3* localconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_restrict_p(local const volatile char3 * restrict localconstvolatilechar3restrictp,\n"
+    "                                                    local const volatile uchar3*restrict localconstvolatileuchar3restrictp,\n"
+    "                                                    local const volatile short3 *restrict localconstvolatileshort3restrictp,\n"
+    "                                                    local const volatile ushort3* restrict localconstvolatileushort3restrictp,\n"
+    "                                                    local const volatile int3 * restrict localconstvolatileint3restrictp,\n"
+    "                                                    local const volatile uint3*restrict localconstvolatileuint3restrictp,\n"
+    "                                                    local const volatile long3 *restrict localconstvolatilelong3restrictp,\n"
+    "                                                    local const volatile ulong3* restrict localconstvolatileulong3restrictp,\n"
+    "                                                    local const volatile float3 * restrict localconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector3_d(char3 char3d,\n"
+    "                      uchar3 uchar3d,\n"
+    "                      short3 short3d,\n"
+    "                      ushort3 ushort3d,\n"
+    "                      int3 int3d,\n"
+    "                      uint3 uint3d,\n"
+    "                      long3 long3d,\n"
+    "                      ulong3 ulong3d,\n"
+    "                      float3 float3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector3_d(const char3 constchar3d,\n"
+    "                            const uchar3 constuchar3d,\n"
+    "                            const short3 constshort3d,\n"
+    "                            const ushort3 constushort3d,\n"
+    "                            const int3 constint3d,\n"
+    "                            const uint3 constuint3d,\n"
+    "                            const long3 constlong3d,\n"
+    "                            const ulong3 constulong3d,\n"
+    "                            const float3 constfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector3_d(private char3 privatechar3d,\n"
+    "                              private uchar3 privateuchar3d,\n"
+    "                              private short3 privateshort3d,\n"
+    "                              private ushort3 privateushort3d,\n"
+    "                              private int3 privateint3d,\n"
+    "                              private uint3 privateuint3d,\n"
+    "                              private long3 privatelong3d,\n"
+    "                              private ulong3 privateulong3d,\n"
+    "                              private float3 privatefloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector3_d(private const char3 privateconstchar3d,\n"
+    "                                    private const uchar3 privateconstuchar3d,\n"
+    "                                    private const short3 privateconstshort3d,\n"
+    "                                    private const ushort3 privateconstushort3d,\n"
+    "                                    private const int3 privateconstint3d,\n"
+    "                                    private const uint3 privateconstuint3d,\n"
+    "                                    private const long3 privateconstlong3d,\n"
+    "                                    private const ulong3 privateconstulong3d,\n"
+    "                                    private const float3 privateconstfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p0(constant char4*constantchar4p,\n"
+    "                               constant uchar4 *constantuchar4p,\n"
+    "                               constant short4* constantshort4p,\n"
+    "                               constant ushort4 * constantushort4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p1(constant int4*constantint4p,\n"
+    "                               constant uint4 *constantuint4p,\n"
+    "                               constant long4* constantlong4p,\n"
+    "                               constant ulong4 * constantulong4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p2(constant float4*constantfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p0(constant char4 *restrict constantchar4restrictp,\n"
+    "                                        constant uchar4* restrict constantuchar4restrictp,\n"
+    "                                        constant short4 * restrict constantshort4restrictp,\n"
+    "                                        constant ushort4*restrict constantushort4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p1(constant int4 *restrict constantint4restrictp,\n"
+    "                                        constant uint4* restrict constantuint4restrictp,\n"
+    "                                        constant long4 * restrict constantlong4restrictp,\n"
+    "                                        constant ulong4*restrict constantulong4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p2(constant float4 *restrict constantfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_p(global char4*globalchar4p,\n"
+    "                             global uchar4 *globaluchar4p,\n"
+    "                             global short4* globalshort4p,\n"
+    "                             global ushort4 * globalushort4p,\n"
+    "                             global int4*globalint4p,\n"
+    "                             global uint4 *globaluint4p,\n"
+    "                             global long4* globallong4p,\n"
+    "                             global ulong4 * globalulong4p,\n"
+    "                             global float4*globalfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_restrict_p(global char4 *restrict globalchar4restrictp,\n"
+    "                                      global uchar4* restrict globaluchar4restrictp,\n"
+    "                                      global short4 * restrict globalshort4restrictp,\n"
+    "                                      global ushort4*restrict globalushort4restrictp,\n"
+    "                                      global int4 *restrict globalint4restrictp,\n"
+    "                                      global uint4* restrict globaluint4restrictp,\n"
+    "                                      global long4 * restrict globallong4restrictp,\n"
+    "                                      global ulong4*restrict globalulong4restrictp,\n"
+    "                                      global float4 *restrict globalfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_p(global const char4* globalconstchar4p,\n"
+    "                                   global const uchar4 * globalconstuchar4p,\n"
+    "                                   global const short4*globalconstshort4p,\n"
+    "                                   global const ushort4 *globalconstushort4p,\n"
+    "                                   global const int4* globalconstint4p,\n"
+    "                                   global const uint4 * globalconstuint4p,\n"
+    "                                   global const long4*globalconstlong4p,\n"
+    "                                   global const ulong4 *globalconstulong4p,\n"
+    "                                   global const float4* globalconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_restrict_p(global const char4 * restrict globalconstchar4restrictp,\n"
+    "                                            global const uchar4*restrict globalconstuchar4restrictp,\n"
+    "                                            global const short4 *restrict globalconstshort4restrictp,\n"
+    "                                            global const ushort4* restrict globalconstushort4restrictp,\n"
+    "                                            global const int4 * restrict globalconstint4restrictp,\n"
+    "                                            global const uint4*restrict globalconstuint4restrictp,\n"
+    "                                            global const long4 *restrict globalconstlong4restrictp,\n"
+    "                                            global const ulong4* restrict globalconstulong4restrictp,\n"
+    "                                            global const float4 * restrict globalconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_p(global volatile char4*globalvolatilechar4p,\n"
+    "                                      global volatile uchar4 *globalvolatileuchar4p,\n"
+    "                                      global volatile short4* globalvolatileshort4p,\n"
+    "                                      global volatile ushort4 * globalvolatileushort4p,\n"
+    "                                      global volatile int4*globalvolatileint4p,\n"
+    "                                      global volatile uint4 *globalvolatileuint4p,\n"
+    "                                      global volatile long4* globalvolatilelong4p,\n"
+    "                                      global volatile ulong4 * globalvolatileulong4p,\n"
+    "                                      global volatile float4*globalvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_restrict_p(global volatile char4 *restrict globalvolatilechar4restrictp,\n"
+    "                                               global volatile uchar4* restrict globalvolatileuchar4restrictp,\n"
+    "                                               global volatile short4 * restrict globalvolatileshort4restrictp,\n"
+    "                                               global volatile ushort4*restrict globalvolatileushort4restrictp,\n"
+    "                                               global volatile int4 *restrict globalvolatileint4restrictp,\n"
+    "                                               global volatile uint4* restrict globalvolatileuint4restrictp,\n"
+    "                                               global volatile long4 * restrict globalvolatilelong4restrictp,\n"
+    "                                               global volatile ulong4*restrict globalvolatileulong4restrictp,\n"
+    "                                               global volatile float4 *restrict globalvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_p(global const volatile char4* globalconstvolatilechar4p,\n"
+    "                                            global const volatile uchar4 * globalconstvolatileuchar4p,\n"
+    "                                            global const volatile short4*globalconstvolatileshort4p,\n"
+    "                                            global const volatile ushort4 *globalconstvolatileushort4p,\n"
+    "                                            global const volatile int4* globalconstvolatileint4p,\n"
+    "                                            global const volatile uint4 * globalconstvolatileuint4p,\n"
+    "                                            global const volatile long4*globalconstvolatilelong4p,\n"
+    "                                            global const volatile ulong4 *globalconstvolatileulong4p,\n"
+    "                                            global const volatile float4* globalconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_restrict_p(global const volatile char4 * restrict globalconstvolatilechar4restrictp,\n"
+    "                                                     global const volatile uchar4*restrict globalconstvolatileuchar4restrictp,\n"
+    "                                                     global const volatile short4 *restrict globalconstvolatileshort4restrictp,\n"
+    "                                                     global const volatile ushort4* restrict globalconstvolatileushort4restrictp,\n"
+    "                                                     global const volatile int4 * restrict globalconstvolatileint4restrictp,\n"
+    "                                                     global const volatile uint4*restrict globalconstvolatileuint4restrictp,\n"
+    "                                                     global const volatile long4 *restrict globalconstvolatilelong4restrictp,\n"
+    "                                                     global const volatile ulong4* restrict globalconstvolatileulong4restrictp,\n"
+    "                                                     global const volatile float4 * restrict globalconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_p(local char4*localchar4p,\n"
+    "                            local uchar4 *localuchar4p,\n"
+    "                            local short4* localshort4p,\n"
+    "                            local ushort4 * localushort4p,\n"
+    "                            local int4*localint4p,\n"
+    "                            local uint4 *localuint4p,\n"
+    "                            local long4* locallong4p,\n"
+    "                            local ulong4 * localulong4p,\n"
+    "                            local float4*localfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_restrict_p(local char4 *restrict localchar4restrictp,\n"
+    "                                     local uchar4* restrict localuchar4restrictp,\n"
+    "                                     local short4 * restrict localshort4restrictp,\n"
+    "                                     local ushort4*restrict localushort4restrictp,\n"
+    "                                     local int4 *restrict localint4restrictp,\n"
+    "                                     local uint4* restrict localuint4restrictp,\n"
+    "                                     local long4 * restrict locallong4restrictp,\n"
+    "                                     local ulong4*restrict localulong4restrictp,\n"
+    "                                     local float4 *restrict localfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_p(local const char4* localconstchar4p,\n"
+    "                                  local const uchar4 * localconstuchar4p,\n"
+    "                                  local const short4*localconstshort4p,\n"
+    "                                  local const ushort4 *localconstushort4p,\n"
+    "                                  local const int4* localconstint4p,\n"
+    "                                  local const uint4 * localconstuint4p,\n"
+    "                                  local const long4*localconstlong4p,\n"
+    "                                  local const ulong4 *localconstulong4p,\n"
+    "                                  local const float4* localconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_restrict_p(local const char4 * restrict localconstchar4restrictp,\n"
+    "                                           local const uchar4*restrict localconstuchar4restrictp,\n"
+    "                                           local const short4 *restrict localconstshort4restrictp,\n"
+    "                                           local const ushort4* restrict localconstushort4restrictp,\n"
+    "                                           local const int4 * restrict localconstint4restrictp,\n"
+    "                                           local const uint4*restrict localconstuint4restrictp,\n"
+    "                                           local const long4 *restrict localconstlong4restrictp,\n"
+    "                                           local const ulong4* restrict localconstulong4restrictp,\n"
+    "                                           local const float4 * restrict localconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_p(local volatile char4*localvolatilechar4p,\n"
+    "                                     local volatile uchar4 *localvolatileuchar4p,\n"
+    "                                     local volatile short4* localvolatileshort4p,\n"
+    "                                     local volatile ushort4 * localvolatileushort4p,\n"
+    "                                     local volatile int4*localvolatileint4p,\n"
+    "                                     local volatile uint4 *localvolatileuint4p,\n"
+    "                                     local volatile long4* localvolatilelong4p,\n"
+    "                                     local volatile ulong4 * localvolatileulong4p,\n"
+    "                                     local volatile float4*localvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_restrict_p(local volatile char4 *restrict localvolatilechar4restrictp,\n"
+    "                                              local volatile uchar4* restrict localvolatileuchar4restrictp,\n"
+    "                                              local volatile short4 * restrict localvolatileshort4restrictp,\n"
+    "                                              local volatile ushort4*restrict localvolatileushort4restrictp,\n"
+    "                                              local volatile int4 *restrict localvolatileint4restrictp,\n"
+    "                                              local volatile uint4* restrict localvolatileuint4restrictp,\n"
+    "                                              local volatile long4 * restrict localvolatilelong4restrictp,\n"
+    "                                              local volatile ulong4*restrict localvolatileulong4restrictp,\n"
+    "                                              local volatile float4 *restrict localvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_p(local const volatile char4* localconstvolatilechar4p,\n"
+    "                                           local const volatile uchar4 * localconstvolatileuchar4p,\n"
+    "                                           local const volatile short4*localconstvolatileshort4p,\n"
+    "                                           local const volatile ushort4 *localconstvolatileushort4p,\n"
+    "                                           local const volatile int4* localconstvolatileint4p,\n"
+    "                                           local const volatile uint4 * localconstvolatileuint4p,\n"
+    "                                           local const volatile long4*localconstvolatilelong4p,\n"
+    "                                           local const volatile ulong4 *localconstvolatileulong4p,\n"
+    "                                           local const volatile float4* localconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_restrict_p(local const volatile char4 * restrict localconstvolatilechar4restrictp,\n"
+    "                                                    local const volatile uchar4*restrict localconstvolatileuchar4restrictp,\n"
+    "                                                    local const volatile short4 *restrict localconstvolatileshort4restrictp,\n"
+    "                                                    local const volatile ushort4* restrict localconstvolatileushort4restrictp,\n"
+    "                                                    local const volatile int4 * restrict localconstvolatileint4restrictp,\n"
+    "                                                    local const volatile uint4*restrict localconstvolatileuint4restrictp,\n"
+    "                                                    local const volatile long4 *restrict localconstvolatilelong4restrictp,\n"
+    "                                                    local const volatile ulong4* restrict localconstvolatileulong4restrictp,\n"
+    "                                                    local const volatile float4 * restrict localconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector4_d(char4 char4d,\n"
+    "                      uchar4 uchar4d,\n"
+    "                      short4 short4d,\n"
+    "                      ushort4 ushort4d,\n"
+    "                      int4 int4d,\n"
+    "                      uint4 uint4d,\n"
+    "                      long4 long4d,\n"
+    "                      ulong4 ulong4d,\n"
+    "                      float4 float4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector4_d(const char4 constchar4d,\n"
+    "                            const uchar4 constuchar4d,\n"
+    "                            const short4 constshort4d,\n"
+    "                            const ushort4 constushort4d,\n"
+    "                            const int4 constint4d,\n"
+    "                            const uint4 constuint4d,\n"
+    "                            const long4 constlong4d,\n"
+    "                            const ulong4 constulong4d,\n"
+    "                            const float4 constfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector4_d(private char4 privatechar4d,\n"
+    "                              private uchar4 privateuchar4d,\n"
+    "                              private short4 privateshort4d,\n"
+    "                              private ushort4 privateushort4d,\n"
+    "                              private int4 privateint4d,\n"
+    "                              private uint4 privateuint4d,\n"
+    "                              private long4 privatelong4d,\n"
+    "                              private ulong4 privateulong4d,\n"
+    "                              private float4 privatefloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector4_d(private const char4 privateconstchar4d,\n"
+    "                                    private const uchar4 privateconstuchar4d,\n"
+    "                                    private const short4 privateconstshort4d,\n"
+    "                                    private const ushort4 privateconstushort4d,\n"
+    "                                    private const int4 privateconstint4d,\n"
+    "                                    private const uint4 privateconstuint4d,\n"
+    "                                    private const long4 privateconstlong4d,\n"
+    "                                    private const ulong4 privateconstulong4d,\n"
+    "                                    private const float4 privateconstfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p0(constant char8*constantchar8p,\n"
+    "                               constant uchar8 *constantuchar8p,\n"
+    "                               constant short8* constantshort8p,\n"
+    "                               constant ushort8 * constantushort8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p1(constant int8*constantint8p,\n"
+    "                               constant uint8 *constantuint8p,\n"
+    "                               constant long8* constantlong8p,\n"
+    "                               constant ulong8 * constantulong8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p2(constant float8*constantfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p0(constant char8 *restrict constantchar8restrictp,\n"
+    "                                        constant uchar8* restrict constantuchar8restrictp,\n"
+    "                                        constant short8 * restrict constantshort8restrictp,\n"
+    "                                        constant ushort8*restrict constantushort8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p1(constant int8 *restrict constantint8restrictp,\n"
+    "                                        constant uint8* restrict constantuint8restrictp,\n"
+    "                                        constant long8 * restrict constantlong8restrictp,\n"
+    "                                        constant ulong8*restrict constantulong8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p2(constant float8 *restrict constantfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_p(global char8*globalchar8p,\n"
+    "                             global uchar8 *globaluchar8p,\n"
+    "                             global short8* globalshort8p,\n"
+    "                             global ushort8 * globalushort8p,\n"
+    "                             global int8*globalint8p,\n"
+    "                             global uint8 *globaluint8p,\n"
+    "                             global long8* globallong8p,\n"
+    "                             global ulong8 * globalulong8p,\n"
+    "                             global float8*globalfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_restrict_p(global char8 *restrict globalchar8restrictp,\n"
+    "                                      global uchar8* restrict globaluchar8restrictp,\n"
+    "                                      global short8 * restrict globalshort8restrictp,\n"
+    "                                      global ushort8*restrict globalushort8restrictp,\n"
+    "                                      global int8 *restrict globalint8restrictp,\n"
+    "                                      global uint8* restrict globaluint8restrictp,\n"
+    "                                      global long8 * restrict globallong8restrictp,\n"
+    "                                      global ulong8*restrict globalulong8restrictp,\n"
+    "                                      global float8 *restrict globalfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_p(global const char8* globalconstchar8p,\n"
+    "                                   global const uchar8 * globalconstuchar8p,\n"
+    "                                   global const short8*globalconstshort8p,\n"
+    "                                   global const ushort8 *globalconstushort8p,\n"
+    "                                   global const int8* globalconstint8p,\n"
+    "                                   global const uint8 * globalconstuint8p,\n"
+    "                                   global const long8*globalconstlong8p,\n"
+    "                                   global const ulong8 *globalconstulong8p,\n"
+    "                                   global const float8* globalconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_restrict_p(global const char8 * restrict globalconstchar8restrictp,\n"
+    "                                            global const uchar8*restrict globalconstuchar8restrictp,\n"
+    "                                            global const short8 *restrict globalconstshort8restrictp,\n"
+    "                                            global const ushort8* restrict globalconstushort8restrictp,\n"
+    "                                            global const int8 * restrict globalconstint8restrictp,\n"
+    "                                            global const uint8*restrict globalconstuint8restrictp,\n"
+    "                                            global const long8 *restrict globalconstlong8restrictp,\n"
+    "                                            global const ulong8* restrict globalconstulong8restrictp,\n"
+    "                                            global const float8 * restrict globalconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_p(global volatile char8*globalvolatilechar8p,\n"
+    "                                      global volatile uchar8 *globalvolatileuchar8p,\n"
+    "                                      global volatile short8* globalvolatileshort8p,\n"
+    "                                      global volatile ushort8 * globalvolatileushort8p,\n"
+    "                                      global volatile int8*globalvolatileint8p,\n"
+    "                                      global volatile uint8 *globalvolatileuint8p,\n"
+    "                                      global volatile long8* globalvolatilelong8p,\n"
+    "                                      global volatile ulong8 * globalvolatileulong8p,\n"
+    "                                      global volatile float8*globalvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_restrict_p(global volatile char8 *restrict globalvolatilechar8restrictp,\n"
+    "                                               global volatile uchar8* restrict globalvolatileuchar8restrictp,\n"
+    "                                               global volatile short8 * restrict globalvolatileshort8restrictp,\n"
+    "                                               global volatile ushort8*restrict globalvolatileushort8restrictp,\n"
+    "                                               global volatile int8 *restrict globalvolatileint8restrictp,\n"
+    "                                               global volatile uint8* restrict globalvolatileuint8restrictp,\n"
+    "                                               global volatile long8 * restrict globalvolatilelong8restrictp,\n"
+    "                                               global volatile ulong8*restrict globalvolatileulong8restrictp,\n"
+    "                                               global volatile float8 *restrict globalvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_p(global const volatile char8* globalconstvolatilechar8p,\n"
+    "                                            global const volatile uchar8 * globalconstvolatileuchar8p,\n"
+    "                                            global const volatile short8*globalconstvolatileshort8p,\n"
+    "                                            global const volatile ushort8 *globalconstvolatileushort8p,\n"
+    "                                            global const volatile int8* globalconstvolatileint8p,\n"
+    "                                            global const volatile uint8 * globalconstvolatileuint8p,\n"
+    "                                            global const volatile long8*globalconstvolatilelong8p,\n"
+    "                                            global const volatile ulong8 *globalconstvolatileulong8p,\n"
+    "                                            global const volatile float8* globalconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_restrict_p(global const volatile char8 * restrict globalconstvolatilechar8restrictp,\n"
+    "                                                     global const volatile uchar8*restrict globalconstvolatileuchar8restrictp,\n"
+    "                                                     global const volatile short8 *restrict globalconstvolatileshort8restrictp,\n"
+    "                                                     global const volatile ushort8* restrict globalconstvolatileushort8restrictp,\n"
+    "                                                     global const volatile int8 * restrict globalconstvolatileint8restrictp,\n"
+    "                                                     global const volatile uint8*restrict globalconstvolatileuint8restrictp,\n"
+    "                                                     global const volatile long8 *restrict globalconstvolatilelong8restrictp,\n"
+    "                                                     global const volatile ulong8* restrict globalconstvolatileulong8restrictp,\n"
+    "                                                     global const volatile float8 * restrict globalconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_p(local char8*localchar8p,\n"
+    "                            local uchar8 *localuchar8p,\n"
+    "                            local short8* localshort8p,\n"
+    "                            local ushort8 * localushort8p,\n"
+    "                            local int8*localint8p,\n"
+    "                            local uint8 *localuint8p,\n"
+    "                            local long8* locallong8p,\n"
+    "                            local ulong8 * localulong8p,\n"
+    "                            local float8*localfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_restrict_p(local char8 *restrict localchar8restrictp,\n"
+    "                                     local uchar8* restrict localuchar8restrictp,\n"
+    "                                     local short8 * restrict localshort8restrictp,\n"
+    "                                     local ushort8*restrict localushort8restrictp,\n"
+    "                                     local int8 *restrict localint8restrictp,\n"
+    "                                     local uint8* restrict localuint8restrictp,\n"
+    "                                     local long8 * restrict locallong8restrictp,\n"
+    "                                     local ulong8*restrict localulong8restrictp,\n"
+    "                                     local float8 *restrict localfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_p(local const char8* localconstchar8p,\n"
+    "                                  local const uchar8 * localconstuchar8p,\n"
+    "                                  local const short8*localconstshort8p,\n"
+    "                                  local const ushort8 *localconstushort8p,\n"
+    "                                  local const int8* localconstint8p,\n"
+    "                                  local const uint8 * localconstuint8p,\n"
+    "                                  local const long8*localconstlong8p,\n"
+    "                                  local const ulong8 *localconstulong8p,\n"
+    "                                  local const float8* localconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_restrict_p(local const char8 * restrict localconstchar8restrictp,\n"
+    "                                           local const uchar8*restrict localconstuchar8restrictp,\n"
+    "                                           local const short8 *restrict localconstshort8restrictp,\n"
+    "                                           local const ushort8* restrict localconstushort8restrictp,\n"
+    "                                           local const int8 * restrict localconstint8restrictp,\n"
+    "                                           local const uint8*restrict localconstuint8restrictp,\n"
+    "                                           local const long8 *restrict localconstlong8restrictp,\n"
+    "                                           local const ulong8* restrict localconstulong8restrictp,\n"
+    "                                           local const float8 * restrict localconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_p(local volatile char8*localvolatilechar8p,\n"
+    "                                     local volatile uchar8 *localvolatileuchar8p,\n"
+    "                                     local volatile short8* localvolatileshort8p,\n"
+    "                                     local volatile ushort8 * localvolatileushort8p,\n"
+    "                                     local volatile int8*localvolatileint8p,\n"
+    "                                     local volatile uint8 *localvolatileuint8p,\n"
+    "                                     local volatile long8* localvolatilelong8p,\n"
+    "                                     local volatile ulong8 * localvolatileulong8p,\n"
+    "                                     local volatile float8*localvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_restrict_p(local volatile char8 *restrict localvolatilechar8restrictp,\n"
+    "                                              local volatile uchar8* restrict localvolatileuchar8restrictp,\n"
+    "                                              local volatile short8 * restrict localvolatileshort8restrictp,\n"
+    "                                              local volatile ushort8*restrict localvolatileushort8restrictp,\n"
+    "                                              local volatile int8 *restrict localvolatileint8restrictp,\n"
+    "                                              local volatile uint8* restrict localvolatileuint8restrictp,\n"
+    "                                              local volatile long8 * restrict localvolatilelong8restrictp,\n"
+    "                                              local volatile ulong8*restrict localvolatileulong8restrictp,\n"
+    "                                              local volatile float8 *restrict localvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_p(local const volatile char8* localconstvolatilechar8p,\n"
+    "                                           local const volatile uchar8 * localconstvolatileuchar8p,\n"
+    "                                           local const volatile short8*localconstvolatileshort8p,\n"
+    "                                           local const volatile ushort8 *localconstvolatileushort8p,\n"
+    "                                           local const volatile int8* localconstvolatileint8p,\n"
+    "                                           local const volatile uint8 * localconstvolatileuint8p,\n"
+    "                                           local const volatile long8*localconstvolatilelong8p,\n"
+    "                                           local const volatile ulong8 *localconstvolatileulong8p,\n"
+    "                                           local const volatile float8* localconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_restrict_p(local const volatile char8 * restrict localconstvolatilechar8restrictp,\n"
+    "                                                    local const volatile uchar8*restrict localconstvolatileuchar8restrictp,\n"
+    "                                                    local const volatile short8 *restrict localconstvolatileshort8restrictp,\n"
+    "                                                    local const volatile ushort8* restrict localconstvolatileushort8restrictp,\n"
+    "                                                    local const volatile int8 * restrict localconstvolatileint8restrictp,\n"
+    "                                                    local const volatile uint8*restrict localconstvolatileuint8restrictp,\n"
+    "                                                    local const volatile long8 *restrict localconstvolatilelong8restrictp,\n"
+    "                                                    local const volatile ulong8* restrict localconstvolatileulong8restrictp,\n"
+    "                                                    local const volatile float8 * restrict localconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector8_d(char8 char8d,\n"
+    "                      uchar8 uchar8d,\n"
+    "                      short8 short8d,\n"
+    "                      ushort8 ushort8d,\n"
+    "                      int8 int8d,\n"
+    "                      uint8 uint8d,\n"
+    "                      long8 long8d,\n"
+    "                      ulong8 ulong8d,\n"
+    "                      float8 float8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector8_d(const char8 constchar8d,\n"
+    "                            const uchar8 constuchar8d,\n"
+    "                            const short8 constshort8d,\n"
+    "                            const ushort8 constushort8d,\n"
+    "                            const int8 constint8d,\n"
+    "                            const uint8 constuint8d,\n"
+    "                            const long8 constlong8d,\n"
+    "                            const ulong8 constulong8d,\n"
+    "                            const float8 constfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector8_d(private char8 privatechar8d,\n"
+    "                              private uchar8 privateuchar8d,\n"
+    "                              private short8 privateshort8d,\n"
+    "                              private ushort8 privateushort8d,\n"
+    "                              private int8 privateint8d,\n"
+    "                              private uint8 privateuint8d,\n"
+    "                              private long8 privatelong8d,\n"
+    "                              private ulong8 privateulong8d,\n"
+    "                              private float8 privatefloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector8_d(private const char8 privateconstchar8d,\n"
+    "                                    private const uchar8 privateconstuchar8d,\n"
+    "                                    private const short8 privateconstshort8d,\n"
+    "                                    private const ushort8 privateconstushort8d,\n"
+    "                                    private const int8 privateconstint8d,\n"
+    "                                    private const uint8 privateconstuint8d,\n"
+    "                                    private const long8 privateconstlong8d,\n"
+    "                                    private const ulong8 privateconstulong8d,\n"
+    "                                    private const float8 privateconstfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p0(constant char16*constantchar16p,\n"
+    "                                constant uchar16 *constantuchar16p,\n"
+    "                                constant short16* constantshort16p,\n"
+    "                                constant ushort16 * constantushort16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p1(constant int16*constantint16p,\n"
+    "                                constant uint16 *constantuint16p,\n"
+    "                                constant long16* constantlong16p,\n"
+    "                                constant ulong16 * constantulong16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p2(constant float16*constantfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p0(constant char16 *restrict constantchar16restrictp,\n"
+    "                                         constant uchar16* restrict constantuchar16restrictp,\n"
+    "                                         constant short16 * restrict constantshort16restrictp,\n"
+    "                                         constant ushort16*restrict constantushort16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p1(constant int16 *restrict constantint16restrictp,\n"
+    "                                         constant uint16* restrict constantuint16restrictp,\n"
+    "                                         constant long16 * restrict constantlong16restrictp,\n"
+    "                                         constant ulong16*restrict constantulong16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p2(constant float16 *restrict constantfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_p(global char16*globalchar16p,\n"
+    "                              global uchar16 *globaluchar16p,\n"
+    "                              global short16* globalshort16p,\n"
+    "                              global ushort16 * globalushort16p,\n"
+    "                              global int16*globalint16p,\n"
+    "                              global uint16 *globaluint16p,\n"
+    "                              global long16* globallong16p,\n"
+    "                              global ulong16 * globalulong16p,\n"
+    "                              global float16*globalfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_restrict_p(global char16 *restrict globalchar16restrictp,\n"
+    "                                       global uchar16* restrict globaluchar16restrictp,\n"
+    "                                       global short16 * restrict globalshort16restrictp,\n"
+    "                                       global ushort16*restrict globalushort16restrictp,\n"
+    "                                       global int16 *restrict globalint16restrictp,\n"
+    "                                       global uint16* restrict globaluint16restrictp,\n"
+    "                                       global long16 * restrict globallong16restrictp,\n"
+    "                                       global ulong16*restrict globalulong16restrictp,\n"
+    "                                       global float16 *restrict globalfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_p(global const char16* globalconstchar16p,\n"
+    "                                    global const uchar16 * globalconstuchar16p,\n"
+    "                                    global const short16*globalconstshort16p,\n"
+    "                                    global const ushort16 *globalconstushort16p,\n"
+    "                                    global const int16* globalconstint16p,\n"
+    "                                    global const uint16 * globalconstuint16p,\n"
+    "                                    global const long16*globalconstlong16p,\n"
+    "                                    global const ulong16 *globalconstulong16p,\n"
+    "                                    global const float16* globalconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_restrict_p(global const char16 * restrict globalconstchar16restrictp,\n"
+    "                                             global const uchar16*restrict globalconstuchar16restrictp,\n"
+    "                                             global const short16 *restrict globalconstshort16restrictp,\n"
+    "                                             global const ushort16* restrict globalconstushort16restrictp,\n"
+    "                                             global const int16 * restrict globalconstint16restrictp,\n"
+    "                                             global const uint16*restrict globalconstuint16restrictp,\n"
+    "                                             global const long16 *restrict globalconstlong16restrictp,\n"
+    "                                             global const ulong16* restrict globalconstulong16restrictp,\n"
+    "                                             global const float16 * restrict globalconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_p(global volatile char16*globalvolatilechar16p,\n"
+    "                                       global volatile uchar16 *globalvolatileuchar16p,\n"
+    "                                       global volatile short16* globalvolatileshort16p,\n"
+    "                                       global volatile ushort16 * globalvolatileushort16p,\n"
+    "                                       global volatile int16*globalvolatileint16p,\n"
+    "                                       global volatile uint16 *globalvolatileuint16p,\n"
+    "                                       global volatile long16* globalvolatilelong16p,\n"
+    "                                       global volatile ulong16 * globalvolatileulong16p,\n"
+    "                                       global volatile float16*globalvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_restrict_p(global volatile char16 *restrict globalvolatilechar16restrictp,\n"
+    "                                                global volatile uchar16* restrict globalvolatileuchar16restrictp,\n"
+    "                                                global volatile short16 * restrict globalvolatileshort16restrictp,\n"
+    "                                                global volatile ushort16*restrict globalvolatileushort16restrictp,\n"
+    "                                                global volatile int16 *restrict globalvolatileint16restrictp,\n"
+    "                                                global volatile uint16* restrict globalvolatileuint16restrictp,\n"
+    "                                                global volatile long16 * restrict globalvolatilelong16restrictp,\n"
+    "                                                global volatile ulong16*restrict globalvolatileulong16restrictp,\n"
+    "                                                global volatile float16 *restrict globalvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_p(global const volatile char16* globalconstvolatilechar16p,\n"
+    "                                             global const volatile uchar16 * globalconstvolatileuchar16p,\n"
+    "                                             global const volatile short16*globalconstvolatileshort16p,\n"
+    "                                             global const volatile ushort16 *globalconstvolatileushort16p,\n"
+    "                                             global const volatile int16* globalconstvolatileint16p,\n"
+    "                                             global const volatile uint16 * globalconstvolatileuint16p,\n"
+    "                                             global const volatile long16*globalconstvolatilelong16p,\n"
+    "                                             global const volatile ulong16 *globalconstvolatileulong16p,\n"
+    "                                             global const volatile float16* globalconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_restrict_p(global const volatile char16 * restrict globalconstvolatilechar16restrictp,\n"
+    "                                                      global const volatile uchar16*restrict globalconstvolatileuchar16restrictp,\n"
+    "                                                      global const volatile short16 *restrict globalconstvolatileshort16restrictp,\n"
+    "                                                      global const volatile ushort16* restrict globalconstvolatileushort16restrictp,\n"
+    "                                                      global const volatile int16 * restrict globalconstvolatileint16restrictp,\n"
+    "                                                      global const volatile uint16*restrict globalconstvolatileuint16restrictp,\n"
+    "                                                      global const volatile long16 *restrict globalconstvolatilelong16restrictp,\n"
+    "                                                      global const volatile ulong16* restrict globalconstvolatileulong16restrictp,\n"
+    "                                                      global const volatile float16 * restrict globalconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_p(local char16*localchar16p,\n"
+    "                             local uchar16 *localuchar16p,\n"
+    "                             local short16* localshort16p,\n"
+    "                             local ushort16 * localushort16p,\n"
+    "                             local int16*localint16p,\n"
+    "                             local uint16 *localuint16p,\n"
+    "                             local long16* locallong16p,\n"
+    "                             local ulong16 * localulong16p,\n"
+    "                             local float16*localfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_restrict_p(local char16 *restrict localchar16restrictp,\n"
+    "                                      local uchar16* restrict localuchar16restrictp,\n"
+    "                                      local short16 * restrict localshort16restrictp,\n"
+    "                                      local ushort16*restrict localushort16restrictp,\n"
+    "                                      local int16 *restrict localint16restrictp,\n"
+    "                                      local uint16* restrict localuint16restrictp,\n"
+    "                                      local long16 * restrict locallong16restrictp,\n"
+    "                                      local ulong16*restrict localulong16restrictp,\n"
+    "                                      local float16 *restrict localfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_p(local const char16* localconstchar16p,\n"
+    "                                   local const uchar16 * localconstuchar16p,\n"
+    "                                   local const short16*localconstshort16p,\n"
+    "                                   local const ushort16 *localconstushort16p,\n"
+    "                                   local const int16* localconstint16p,\n"
+    "                                   local const uint16 * localconstuint16p,\n"
+    "                                   local const long16*localconstlong16p,\n"
+    "                                   local const ulong16 *localconstulong16p,\n"
+    "                                   local const float16* localconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_restrict_p(local const char16 * restrict localconstchar16restrictp,\n"
+    "                                            local const uchar16*restrict localconstuchar16restrictp,\n"
+    "                                            local const short16 *restrict localconstshort16restrictp,\n"
+    "                                            local const ushort16* restrict localconstushort16restrictp,\n"
+    "                                            local const int16 * restrict localconstint16restrictp,\n"
+    "                                            local const uint16*restrict localconstuint16restrictp,\n"
+    "                                            local const long16 *restrict localconstlong16restrictp,\n"
+    "                                            local const ulong16* restrict localconstulong16restrictp,\n"
+    "                                            local const float16 * restrict localconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_p(local volatile char16*localvolatilechar16p,\n"
+    "                                      local volatile uchar16 *localvolatileuchar16p,\n"
+    "                                      local volatile short16* localvolatileshort16p,\n"
+    "                                      local volatile ushort16 * localvolatileushort16p,\n"
+    "                                      local volatile int16*localvolatileint16p,\n"
+    "                                      local volatile uint16 *localvolatileuint16p,\n"
+    "                                      local volatile long16* localvolatilelong16p,\n"
+    "                                      local volatile ulong16 * localvolatileulong16p,\n"
+    "                                      local volatile float16*localvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_restrict_p(local volatile char16 *restrict localvolatilechar16restrictp,\n"
+    "                                               local volatile uchar16* restrict localvolatileuchar16restrictp,\n"
+    "                                               local volatile short16 * restrict localvolatileshort16restrictp,\n"
+    "                                               local volatile ushort16*restrict localvolatileushort16restrictp,\n"
+    "                                               local volatile int16 *restrict localvolatileint16restrictp,\n"
+    "                                               local volatile uint16* restrict localvolatileuint16restrictp,\n"
+    "                                               local volatile long16 * restrict localvolatilelong16restrictp,\n"
+    "                                               local volatile ulong16*restrict localvolatileulong16restrictp,\n"
+    "                                               local volatile float16 *restrict localvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_p(local const volatile char16* localconstvolatilechar16p,\n"
+    "                                            local const volatile uchar16 * localconstvolatileuchar16p,\n"
+    "                                            local const volatile short16*localconstvolatileshort16p,\n"
+    "                                            local const volatile ushort16 *localconstvolatileushort16p,\n"
+    "                                            local const volatile int16* localconstvolatileint16p,\n"
+    "                                            local const volatile uint16 * localconstvolatileuint16p,\n"
+    "                                            local const volatile long16*localconstvolatilelong16p,\n"
+    "                                            local const volatile ulong16 *localconstvolatileulong16p,\n"
+    "                                            local const volatile float16* localconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_restrict_p(local const volatile char16 * restrict localconstvolatilechar16restrictp,\n"
+    "                                                     local const volatile uchar16*restrict localconstvolatileuchar16restrictp,\n"
+    "                                                     local const volatile short16 *restrict localconstvolatileshort16restrictp,\n"
+    "                                                     local const volatile ushort16* restrict localconstvolatileushort16restrictp,\n"
+    "                                                     local const volatile int16 * restrict localconstvolatileint16restrictp,\n"
+    "                                                     local const volatile uint16*restrict localconstvolatileuint16restrictp,\n"
+    "                                                     local const volatile long16 *restrict localconstvolatilelong16restrictp,\n"
+    "                                                     local const volatile ulong16* restrict localconstvolatileulong16restrictp,\n"
+    "                                                     local const volatile float16 * restrict localconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector16_d(char16 char16d,\n"
+    "                       uchar16 uchar16d,\n"
+    "                       short16 short16d,\n"
+    "                       ushort16 ushort16d,\n"
+    "                       int16 int16d,\n"
+    "                       uint16 uint16d,\n"
+    "                       long16 long16d,\n"
+    "                       ulong16 ulong16d,\n"
+    "                       float16 float16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector16_d(const char16 constchar16d,\n"
+    "                             const uchar16 constuchar16d,\n"
+    "                             const short16 constshort16d,\n"
+    "                             const ushort16 constushort16d,\n"
+    "                             const int16 constint16d,\n"
+    "                             const uint16 constuint16d,\n"
+    "                             const long16 constlong16d,\n"
+    "                             const ulong16 constulong16d,\n"
+    "                             const float16 constfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector16_d(private char16 privatechar16d,\n"
+    "                               private uchar16 privateuchar16d,\n"
+    "                               private short16 privateshort16d,\n"
+    "                               private ushort16 privateushort16d,\n"
+    "                               private int16 privateint16d,\n"
+    "                               private uint16 privateuint16d,\n"
+    "                               private long16 privatelong16d,\n"
+    "                               private ulong16 privateulong16d,\n"
+    "                               private float16 privatefloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector16_d(private const char16 privateconstchar16d,\n"
+    "                                     private const uchar16 privateconstuchar16d,\n"
+    "                                     private const short16 privateconstshort16d,\n"
+    "                                     private const ushort16 privateconstushort16d,\n"
+    "                                     private const int16 privateconstint16d,\n"
+    "                                     private const uint16 privateconstuint16d,\n"
+    "                                     private const long16 privateconstlong16d,\n"
+    "                                     private const ulong16 privateconstulong16d,\n"
+    "                                     private const float16 privateconstfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p0(constant typedef_type*constanttypedef_typep,\n"
+    "                               constant struct struct_type *constantstructstruct_typep,\n"
+    "                               constant typedef_struct_type* constanttypedef_struct_typep,\n"
+    "                               constant union union_type * constantunionunion_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p1(constant typedef_union_type*constanttypedef_union_typep,\n"
+    "                               constant enum enum_type *constantenumenum_typep,\n"
+    "                               constant typedef_enum_type* constanttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p0(constant typedef_type * restrict constanttypedef_typerestrictp,\n"
+    "                                        constant struct struct_type*restrict constantstructstruct_typerestrictp,\n"
+    "                                        constant typedef_struct_type *restrict constanttypedef_struct_typerestrictp,\n"
+    "                                        constant union union_type* restrict constantunionunion_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p1(constant typedef_union_type * restrict constanttypedef_union_typerestrictp,\n"
+    "                                        constant enum enum_type*restrict constantenumenum_typerestrictp,\n"
+    "                                        constant typedef_enum_type *restrict constanttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_p(global typedef_type*globaltypedef_typep,\n"
+    "                             global struct struct_type *globalstructstruct_typep,\n"
+    "                             global typedef_struct_type* globaltypedef_struct_typep,\n"
+    "                             global union union_type * globalunionunion_typep,\n"
+    "                             global typedef_union_type*globaltypedef_union_typep,\n"
+    "                             global enum enum_type *globalenumenum_typep,\n"
+    "                             global typedef_enum_type* globaltypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_restrict_p(global typedef_type * restrict globaltypedef_typerestrictp,\n"
+    "                                      global struct struct_type*restrict globalstructstruct_typerestrictp,\n"
+    "                                      global typedef_struct_type *restrict globaltypedef_struct_typerestrictp,\n"
+    "                                      global union union_type* restrict globalunionunion_typerestrictp,\n"
+    "                                      global typedef_union_type * restrict globaltypedef_union_typerestrictp,\n"
+    "                                      global enum enum_type*restrict globalenumenum_typerestrictp,\n"
+    "                                      global typedef_enum_type *restrict globaltypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_p(global const typedef_type* globalconsttypedef_typep,\n"
+    "                                   global const struct struct_type * globalconststructstruct_typep,\n"
+    "                                   global const typedef_struct_type*globalconsttypedef_struct_typep,\n"
+    "                                   global const union union_type *globalconstunionunion_typep,\n"
+    "                                   global const typedef_union_type* globalconsttypedef_union_typep,\n"
+    "                                   global const enum enum_type * globalconstenumenum_typep,\n"
+    "                                   global const typedef_enum_type*globalconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_restrict_p(global const typedef_type *restrict globalconsttypedef_typerestrictp,\n"
+    "                                            global const struct struct_type* restrict globalconststructstruct_typerestrictp,\n"
+    "                                            global const typedef_struct_type * restrict globalconsttypedef_struct_typerestrictp,\n"
+    "                                            global const union union_type*restrict globalconstunionunion_typerestrictp,\n"
+    "                                            global const typedef_union_type *restrict globalconsttypedef_union_typerestrictp,\n"
+    "                                            global const enum enum_type* restrict globalconstenumenum_typerestrictp,\n"
+    "                                            global const typedef_enum_type * restrict globalconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_p(global volatile typedef_type*globalvolatiletypedef_typep,\n"
+    "                                      global volatile struct struct_type *globalvolatilestructstruct_typep,\n"
+    "                                      global volatile typedef_struct_type* globalvolatiletypedef_struct_typep,\n"
+    "                                      global volatile union union_type * globalvolatileunionunion_typep,\n"
+    "                                      global volatile typedef_union_type*globalvolatiletypedef_union_typep,\n"
+    "                                      global volatile enum enum_type *globalvolatileenumenum_typep,\n"
+    "                                      global volatile typedef_enum_type* globalvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_restrict_p(global volatile typedef_type * restrict globalvolatiletypedef_typerestrictp,\n"
+    "                                               global volatile struct struct_type*restrict globalvolatilestructstruct_typerestrictp,\n"
+    "                                               global volatile typedef_struct_type *restrict globalvolatiletypedef_struct_typerestrictp,\n"
+    "                                               global volatile union union_type* restrict globalvolatileunionunion_typerestrictp,\n"
+    "                                               global volatile typedef_union_type * restrict globalvolatiletypedef_union_typerestrictp,\n"
+    "                                               global volatile enum enum_type*restrict globalvolatileenumenum_typerestrictp,\n"
+    "                                               global volatile typedef_enum_type *restrict globalvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_p(global const volatile typedef_type* globalconstvolatiletypedef_typep,\n"
+    "                                            global const volatile struct struct_type * globalconstvolatilestructstruct_typep,\n"
+    "                                            global const volatile typedef_struct_type*globalconstvolatiletypedef_struct_typep,\n"
+    "                                            global const volatile union union_type *globalconstvolatileunionunion_typep,\n"
+    "                                            global const volatile typedef_union_type* globalconstvolatiletypedef_union_typep,\n"
+    "                                            global const volatile enum enum_type * globalconstvolatileenumenum_typep,\n"
+    "                                            global const volatile typedef_enum_type*globalconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_restrict_p(global const volatile typedef_type *restrict globalconstvolatiletypedef_typerestrictp,\n"
+    "                                                     global const volatile struct struct_type* restrict globalconstvolatilestructstruct_typerestrictp,\n"
+    "                                                     global const volatile typedef_struct_type * restrict globalconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                     global const volatile union union_type*restrict globalconstvolatileunionunion_typerestrictp,\n"
+    "                                                     global const volatile typedef_union_type *restrict globalconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                     global const volatile enum enum_type* restrict globalconstvolatileenumenum_typerestrictp,\n"
+    "                                                     global const volatile typedef_enum_type * restrict globalconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_p(local typedef_type*localtypedef_typep,\n"
+    "                            local struct struct_type *localstructstruct_typep,\n"
+    "                            local typedef_struct_type* localtypedef_struct_typep,\n"
+    "                            local union union_type * localunionunion_typep,\n"
+    "                            local typedef_union_type*localtypedef_union_typep,\n"
+    "                            local enum enum_type *localenumenum_typep,\n"
+    "                            local typedef_enum_type* localtypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_restrict_p(local typedef_type * restrict localtypedef_typerestrictp,\n"
+    "                                     local struct struct_type*restrict localstructstruct_typerestrictp,\n"
+    "                                     local typedef_struct_type *restrict localtypedef_struct_typerestrictp,\n"
+    "                                     local union union_type* restrict localunionunion_typerestrictp,\n"
+    "                                     local typedef_union_type * restrict localtypedef_union_typerestrictp,\n"
+    "                                     local enum enum_type*restrict localenumenum_typerestrictp,\n"
+    "                                     local typedef_enum_type *restrict localtypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_p(local const typedef_type* localconsttypedef_typep,\n"
+    "                                  local const struct struct_type * localconststructstruct_typep,\n"
+    "                                  local const typedef_struct_type*localconsttypedef_struct_typep,\n"
+    "                                  local const union union_type *localconstunionunion_typep,\n"
+    "                                  local const typedef_union_type* localconsttypedef_union_typep,\n"
+    "                                  local const enum enum_type * localconstenumenum_typep,\n"
+    "                                  local const typedef_enum_type*localconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_restrict_p(local const typedef_type *restrict localconsttypedef_typerestrictp,\n"
+    "                                           local const struct struct_type* restrict localconststructstruct_typerestrictp,\n"
+    "                                           local const typedef_struct_type * restrict localconsttypedef_struct_typerestrictp,\n"
+    "                                           local const union union_type*restrict localconstunionunion_typerestrictp,\n"
+    "                                           local const typedef_union_type *restrict localconsttypedef_union_typerestrictp,\n"
+    "                                           local const enum enum_type* restrict localconstenumenum_typerestrictp,\n"
+    "                                           local const typedef_enum_type * restrict localconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_p(local volatile typedef_type*localvolatiletypedef_typep,\n"
+    "                                     local volatile struct struct_type *localvolatilestructstruct_typep,\n"
+    "                                     local volatile typedef_struct_type* localvolatiletypedef_struct_typep,\n"
+    "                                     local volatile union union_type * localvolatileunionunion_typep,\n"
+    "                                     local volatile typedef_union_type*localvolatiletypedef_union_typep,\n"
+    "                                     local volatile enum enum_type *localvolatileenumenum_typep,\n"
+    "                                     local volatile typedef_enum_type* localvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_restrict_p(local volatile typedef_type * restrict localvolatiletypedef_typerestrictp,\n"
+    "                                              local volatile struct struct_type*restrict localvolatilestructstruct_typerestrictp,\n"
+    "                                              local volatile typedef_struct_type *restrict localvolatiletypedef_struct_typerestrictp,\n"
+    "                                              local volatile union union_type* restrict localvolatileunionunion_typerestrictp,\n"
+    "                                              local volatile typedef_union_type * restrict localvolatiletypedef_union_typerestrictp,\n"
+    "                                              local volatile enum enum_type*restrict localvolatileenumenum_typerestrictp,\n"
+    "                                              local volatile typedef_enum_type *restrict localvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_p(local const volatile typedef_type* localconstvolatiletypedef_typep,\n"
+    "                                           local const volatile struct struct_type * localconstvolatilestructstruct_typep,\n"
+    "                                           local const volatile typedef_struct_type*localconstvolatiletypedef_struct_typep,\n"
+    "                                           local const volatile union union_type *localconstvolatileunionunion_typep,\n"
+    "                                           local const volatile typedef_union_type* localconstvolatiletypedef_union_typep,\n"
+    "                                           local const volatile enum enum_type * localconstvolatileenumenum_typep,\n"
+    "                                           local const volatile typedef_enum_type*localconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_restrict_p(local const volatile typedef_type *restrict localconstvolatiletypedef_typerestrictp,\n"
+    "                                                    local const volatile struct struct_type* restrict localconstvolatilestructstruct_typerestrictp,\n"
+    "                                                    local const volatile typedef_struct_type * restrict localconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                    local const volatile union union_type*restrict localconstvolatileunionunion_typerestrictp,\n"
+    "                                                    local const volatile typedef_union_type *restrict localconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                    local const volatile enum enum_type* restrict localconstvolatileenumenum_typerestrictp,\n"
+    "                                                    local const volatile typedef_enum_type * restrict localconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void derived_d(typedef_type typedef_typed,\n"
+    "                      struct struct_type structstruct_typed,\n"
+    "                      typedef_struct_type typedef_struct_typed,\n"
+    "                      union union_type unionunion_typed,\n"
+    "                      typedef_union_type typedef_union_typed,\n"
+    "                      enum enum_type enumenum_typed,\n"
+    "                      typedef_enum_type typedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_derived_d(const typedef_type consttypedef_typed,\n"
+    "                            const struct struct_type conststructstruct_typed,\n"
+    "                            const typedef_struct_type consttypedef_struct_typed,\n"
+    "                            const union union_type constunionunion_typed,\n"
+    "                            const typedef_union_type consttypedef_union_typed,\n"
+    "                            const enum enum_type constenumenum_typed,\n"
+    "                            const typedef_enum_type consttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_derived_d(private typedef_type privatetypedef_typed,\n"
+    "                              private struct struct_type privatestructstruct_typed,\n"
+    "                              private typedef_struct_type privatetypedef_struct_typed,\n"
+    "                              private union union_type privateunionunion_typed,\n"
+    "                              private typedef_union_type privatetypedef_union_typed,\n"
+    "                              private enum enum_type privateenumenum_typed,\n"
+    "                              private typedef_enum_type privatetypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_derived_d(private const typedef_type privateconsttypedef_typed,\n"
+    "                                    private const struct struct_type privateconststructstruct_typed,\n"
+    "                                    private const typedef_struct_type privateconsttypedef_struct_typed,\n"
+    "                                    private const union union_type privateconstunionunion_typed,\n"
+    "                                    private const typedef_union_type privateconsttypedef_union_typed,\n"
+    "                                    private const enum enum_type privateconstenumenum_typed,\n"
+    "                                    private const typedef_enum_type privateconsttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+static const char * required_arg_info[][72] = {
+  // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4
+    {
+        "constant_scalar_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "constantvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "constantcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantunsignedcharp",
+    NULL
+  },
+  {
+    "constant_scalar_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "constantshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "constantintp",
+    NULL
+  },
+  {
+    "constant_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "constantlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantulongp",
+    NULL
+  },
+  {
+    "constant_scalar_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "constantfloatp",
+        NULL
+    },
+    {
+        "constant_scalar_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "constantvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "constantcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantunsignedcharrestrictp",
+    NULL
+  },
+  {
+    "constant_scalar_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "constantshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "constantintrestrictp",
+    NULL
+  },
+  {
+    "constant_scalar_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "constantlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantulongrestrictp",
+    NULL
+  },
+  {
+    "constant_scalar_restrict_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "constantfloatrestrictp",
+        NULL
+    },
+    {
+        "global_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "globalvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "globalcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "globalshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "globalintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globaluintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globalunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "globallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "globalfloatp",
+        NULL
+    },
+    {
+        "global_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globaluintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalfloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "globalconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "globalconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "globalconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "globalconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "globalconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "globalconstfloatp",
+        NULL
+    },
+    {
+        "global_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstfloatrestrictp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalvolatilefloatp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalconstvolatilefloatp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "localvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "localcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "localshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "localintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "locallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "localfloatp",
+        NULL
+    },
+    {
+        "local_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "locallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localfloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "localconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "localconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "localconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "localconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "localconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "localconstfloatp",
+        NULL
+    },
+    {
+        "local_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstfloatrestrictp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localvolatilefloatp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localconstvolatilefloatp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "chard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "uchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "unsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "shortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "ushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "unsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "intd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "uintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "unsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "longd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "ulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "unsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "floatd",
+        NULL
+    },
+    {
+        "const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "constchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "constshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "constintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "constlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constunsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "constfloatd",
+        NULL
+    },
+    {
+        "private_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privatechard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privatelongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateunsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privatefloatd",
+        NULL
+    },
+    {
+        "private_const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privateconstchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateconstshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateconstintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privateconstlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstunsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privateconstfloatd",
+        NULL
+    },
+    {
+        "constant_vector2_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "constantchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "constantuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "constantshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "constantushort2p",
+    NULL
+    },
+    {
+        "constant_vector2_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "constantint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "constantuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "constantlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "constantulong2p",
+    NULL
+    },
+    {
+        "constant_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "constantfloat2p",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "constantchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "constantuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "constantshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "constantushort2restrictp",
+    NULL
+    },
+    {
+        "constant_vector2_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "constantint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "constantuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "constantlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "constantulong2restrictp",
+    NULL
+    },
+    {
+        "constant_vector2_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "constantfloat2restrictp",
+        NULL
+    },
+    {
+        "global_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "globalchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "globaluchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "globalshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "globalushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "globalint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "globaluint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "globallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "globalulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "globalfloat2p",
+        NULL
+    },
+    {
+        "global_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globaluchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globaluint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalfloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "globalconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "globalconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "globalconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "globalconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "globalconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "globalconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "globalconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "globalconstulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "globalconstfloat2p",
+        NULL
+    },
+    {
+        "global_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstfloat2restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalconstvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "localchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "localuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "localshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "localushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "localint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "localuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "locallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "localulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "localfloat2p",
+        NULL
+    },
+    {
+        "local_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "locallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localfloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "localconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "localconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "localconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "localconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "localconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "localconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "localconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "localconstulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "localconstfloat2p",
+        NULL
+    },
+    {
+        "local_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstfloat2restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localconstvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "char2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "uchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "short2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "ushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "int2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "uint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "long2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "ulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "float2d",
+        NULL
+    },
+    {
+        "const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "constchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "constuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "constshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "constushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "constint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "constuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "constlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "constulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "constfloat2d",
+        NULL
+    },
+    {
+        "private_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privatechar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privatelong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privatefloat2d",
+        NULL
+    },
+    {
+        "private_const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privateconstchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateconstuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateconstshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateconstushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateconstint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateconstuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privateconstlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateconstulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privateconstfloat2d",
+        NULL
+    },
+    {
+        "constant_vector3_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "constantchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "constantuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "constantshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "constantushort3p",
+        NULL
+    },
+    {
+        "constant_vector3_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "constantint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "constantuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "constantlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "constantulong3p",
+    NULL
+    },
+    {
+        "constant_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "constantfloat3p",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "constantchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "constantuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "constantshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "constantushort3restrictp",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "constantint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "constantuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "constantlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "constantulong3restrictp",
+    NULL
+    },
+    {
+        "constant_vector3_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "constantfloat3restrictp",
+        NULL
+    },
+    {
+        "global_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "globalchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "globaluchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "globalshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "globalushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "globalint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "globaluint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "globallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "globalulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "globalfloat3p",
+        NULL
+    },
+    {
+        "global_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globaluchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globaluint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalfloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "globalconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "globalconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "globalconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "globalconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "globalconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "globalconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "globalconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "globalconstulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "globalconstfloat3p",
+        NULL
+    },
+    {
+        "global_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstfloat3restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalconstvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "localchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "localuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "localshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "localushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "localint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "localuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "locallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "localulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "localfloat3p",
+        NULL
+    },
+    {
+        "local_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "locallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localfloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "localconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "localconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "localconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "localconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "localconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "localconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "localconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "localconstulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "localconstfloat3p",
+        NULL
+    },
+    {
+        "local_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstfloat3restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localconstvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "char3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "uchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "short3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "ushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "int3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "uint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "long3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "ulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "float3d",
+        NULL
+    },
+    {
+        "const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "constchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "constuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "constshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "constushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "constint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "constuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "constlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "constulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "constfloat3d",
+        NULL
+    },
+    {
+        "private_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privatechar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privatelong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privatefloat3d",
+        NULL
+    },
+    {
+        "private_const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privateconstchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateconstuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateconstshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateconstushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateconstint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateconstuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privateconstlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateconstulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privateconstfloat3d",
+        NULL
+    },
+    {
+        "constant_vector4_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "constantchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "constantuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "constantshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "constantushort4p",
+        NULL
+    },
+    {
+        "constant_vector4_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "constantint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "constantuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "constantlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "constantulong4p",
+        NULL
+    },
+    {
+        "constant_vector4_p2",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "constantfloat4p",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "constantchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "constantuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "constantshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "constantushort4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "constantint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "constantuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "constantlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "constantulong4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p2",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "constantfloat4restrictp",
+        NULL
+    },
+    {
+        "global_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "globalchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "globaluchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "globalshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "globalushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "globalint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "globaluint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "globallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "globalulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "globalfloat4p",
+        NULL
+    },
+    {
+        "global_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globaluchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globaluint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalfloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "globalconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "globalconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "globalconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "globalconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "globalconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "globalconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "globalconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "globalconstulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "globalconstfloat4p",
+        NULL
+    },
+    {
+        "global_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstfloat4restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalconstvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "localchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "localuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "localshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "localushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "localint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "localuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "locallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "localulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "localfloat4p",
+        NULL
+    },
+    {
+        "local_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "locallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localfloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "localconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "localconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "localconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "localconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "localconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "localconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "localconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "localconstulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "localconstfloat4p",
+        NULL
+    },
+    {
+        "local_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstfloat4restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localconstvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "char4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "uchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "short4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "ushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "int4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "uint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "long4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "ulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "float4d",
+        NULL
+    },
+    {
+        "const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "constchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "constuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "constshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "constushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "constint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "constuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "constlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "constulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "constfloat4d",
+        NULL
+    },
+    {
+        "private_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privatechar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privatelong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privatefloat4d",
+        NULL
+    },
+    {
+        "private_const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privateconstchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateconstuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateconstshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateconstushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateconstint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateconstuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privateconstlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateconstulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privateconstfloat4d",
+        NULL
+    },
+    {
+        "constant_vector8_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "constantchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "constantuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "constantshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "constantushort8p",
+        NULL
+    },
+    {
+        "constant_vector8_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "constantint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "constantuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "constantlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "constantulong8p",
+    NULL
+    },
+    {
+        "constant_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "constantfloat8p",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "constantchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "constantuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "constantshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "constantushort8restrictp",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "constantint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "constantuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "constantlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "constantulong8restrictp",
+    NULL
+    },
+    {
+        "constant_vector8_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "constantfloat8restrictp",
+        NULL
+    },
+    {
+        "global_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "globalchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "globaluchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "globalshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "globalushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "globalint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "globaluint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "globallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "globalulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "globalfloat8p",
+        NULL
+    },
+    {
+        "global_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globaluchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globaluint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalfloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "globalconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "globalconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "globalconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "globalconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "globalconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "globalconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "globalconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "globalconstulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "globalconstfloat8p",
+        NULL
+    },
+    {
+        "global_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstfloat8restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalconstvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "localchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "localuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "localshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "localushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "localint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "localuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "locallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "localulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "localfloat8p",
+        NULL
+    },
+    {
+        "local_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "locallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localfloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "localconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "localconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "localconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "localconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "localconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "localconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "localconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "localconstulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "localconstfloat8p",
+        NULL
+    },
+    {
+        "local_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstfloat8restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localconstvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "char8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "uchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "short8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "ushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "int8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "uint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "long8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "ulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "float8d",
+        NULL
+    },
+    {
+        "const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "constchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "constuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "constshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "constushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "constint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "constuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "constlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "constulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "constfloat8d",
+        NULL
+    },
+    {
+        "private_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privatechar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privatelong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privatefloat8d",
+        NULL
+    },
+    {
+        "private_const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privateconstchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateconstuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateconstshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateconstushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateconstint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateconstuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privateconstlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateconstulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privateconstfloat8d",
+        NULL
+    },
+    {
+        "constant_vector16_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "constantchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "constantuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "constantshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "constantushort16p",
+        NULL
+    },
+    {
+        "constant_vector16_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "constantint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "constantuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "constantlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "constantulong16p",
+    NULL
+    },
+    {
+        "constant_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "constantfloat16p",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "constantchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "constantuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "constantshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "constantushort16restrictp",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "constantint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "constantuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "constantlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "constantulong16restrictp",
+    NULL
+    },
+    {
+        "constant_vector16_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "constantfloat16restrictp",
+        NULL
+    },
+    {
+        "global_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "globalchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "globaluchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "globalshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "globalushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "globalint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "globaluint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "globallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "globalulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "globalfloat16p",
+        NULL
+    },
+    {
+        "global_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globaluchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globaluint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalfloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "globalconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "globalconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "globalconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "globalconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "globalconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "globalconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "globalconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "globalconstulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "globalconstfloat16p",
+        NULL
+    },
+    {
+        "global_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstfloat16restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalconstvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "localchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "localuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "localshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "localushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "localint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "localuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "locallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "localulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "localfloat16p",
+        NULL
+    },
+    {
+        "local_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "locallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localfloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "localconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "localconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "localconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "localconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "localconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "localconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "localconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "localconstulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "localconstfloat16p",
+        NULL
+    },
+    {
+        "local_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstfloat16restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localconstvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "char16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "uchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "short16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "ushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "int16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "uint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "long16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "ulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "float16d",
+        NULL
+    },
+    {
+        "const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "constchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "constuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "constshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "constushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "constint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "constuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "constlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "constulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "constfloat16d",
+        NULL
+    },
+    {
+        "private_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privatechar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privatelong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privatefloat16d",
+        NULL
+    },
+    {
+        "private_const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privateconstchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateconstuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateconstshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateconstushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateconstint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateconstuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privateconstlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateconstulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privateconstfloat16d",
+        NULL
+    },
+    {
+        "constant_derived_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "constanttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "constantstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "constanttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "constantunionunion_typep",
+        NULL
+    },
+    {
+        "constant_derived_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "constanttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "constantenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "constanttypedef_enum_typep",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "constanttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "constantstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "constanttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "constantunionunion_typerestrictp",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "constanttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "constantenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "constanttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "globaltypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "globalstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "globaltypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "globalunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "globaltypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "globalenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "globaltypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globaltypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globaltypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globaltypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globaltypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "globalconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "globalconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "globalconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "globalconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "globalconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "globalconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "globalconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "localtypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "localstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "localtypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "localunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "localtypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "localenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "localtypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localtypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localtypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localtypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localtypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "localconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "localconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "localconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "localconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "localconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "localconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "localconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "typedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "structstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "typedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "unionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "typedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "enumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "typedef_enum_typed",
+        NULL
+    },
+    {
+        "const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "consttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "conststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "consttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "constunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "consttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "constenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "consttypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privatetypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privatestructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privatetypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privatetypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privatetypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privateconsttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privateconststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privateconsttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateconstunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privateconsttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateconstenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privateconsttypedef_enum_typed",
+        NULL
+    },
+// Support for optional image data type
+static const char * image_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable\n"
+    "kernel void image_d(read_only image2d_t image2d_td0,\n"
+    "                    write_only image2d_t image2d_td1,\n"
+    "                    read_only image3d_t image3d_td2,\n"
+    "                    write_only image3d_t image3d_td3,\n"
+    "                    read_only image2d_array_t image2d_array_td4,\n"
+    "                    write_only image2d_array_t image2d_array_td5,\n"
+    "                    read_only image1d_t image1d_td6,\n"
+    "                    write_only image1d_t image1d_td7,\n"
+    "                    read_only image1d_buffer_t image1d_buffer_td8,\n"
+    "                    write_only image1d_buffer_t image1d_buffer_td9,\n"
+    "                    read_only image1d_array_t image1d_array_td10,\n"
+    "                    write_only image1d_array_t image1d_array_td11,\n"
+    "                    sampler_t sampler_td12)\n"
+    "{}\n",
+    "\n"
+static const char * image_arg_info[][67] = {
+    {
+        "image_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td4",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td5",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td6",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td7",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td8",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td9",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td10",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td11",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "sampler_t", "sampler_td12",
+        NULL
+    },
+// Support for optional double data type
+static const char * double_kernel_args[] = {
+    "kernel void double_scalar_p(constant double*constantdoublep,\n"
+    "                            constant double *restrict constantdoublerestrictp,\n"
+    "                            global double*globaldoublep,\n"
+    "                            global double *restrict globaldoublerestrictp,\n"
+    "                            global const double* globalconstdoublep,\n"
+    "                            global const double * restrict globalconstdoublerestrictp,\n"
+    "                            global volatile double*globalvolatiledoublep,\n"
+    "                            global volatile double *restrict globalvolatiledoublerestrictp,\n"
+    "                            global const volatile double* globalconstvolatiledoublep)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_p2(global const volatile double * restrict globalconstvolatiledoublerestrictp,\n"
+    "                             local double*localdoublep,\n"
+    "                             local double *restrict localdoublerestrictp,\n"
+    "                             local const double* localconstdoublep,\n"
+    "                             local const double * restrict localconstdoublerestrictp,\n"
+    "                             local volatile double*localvolatiledoublep,\n"
+    "                             local volatile double *restrict localvolatiledoublerestrictp,\n"
+    "                             local const volatile double* localconstvolatiledoublep,\n"
+    "                             local const volatile double * restrict localconstvolatiledoublerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_d(double doubled,\n"
+    "                            const double constdoubled,\n"
+    "                            private double privatedoubled,\n"
+    "                            private const double privateconstdoubled)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p(constant double2*constantdouble2p,\n"
+    "                             constant double2 *restrict constantdouble2restrictp,\n"
+    "                             global double2*globaldouble2p,\n"
+    "                             global double2 *restrict globaldouble2restrictp,\n"
+    "                             global const double2* globalconstdouble2p,\n"
+    "                             global const double2 * restrict globalconstdouble2restrictp,\n"
+    "                             global volatile double2*globalvolatiledouble2p,\n"
+    "                             global volatile double2 *restrict globalvolatiledouble2restrictp,\n"
+    "                             global const volatile double2* globalconstvolatiledouble2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p2(global const volatile double2 * restrict globalconstvolatiledouble2restrictp,\n"
+    "                              local double2*localdouble2p,\n"
+    "                              local double2 *restrict localdouble2restrictp,\n"
+    "                              local const double2* localconstdouble2p,\n"
+    "                              local const double2 * restrict localconstdouble2restrictp,\n"
+    "                              local volatile double2*localvolatiledouble2p,\n"
+    "                              local volatile double2 *restrict localvolatiledouble2restrictp,\n"
+    "                              local const volatile double2* localconstvolatiledouble2p,\n"
+    "                              local const volatile double2 * restrict localconstvolatiledouble2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_d(double2 double2d,\n"
+    "                             const double2 constdouble2d,\n"
+    "                             private double2 privatedouble2d,\n"
+    "                             private const double2 privateconstdouble2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p(constant double3*constantdouble3p,\n"
+    "                             constant double3 *restrict constantdouble3restrictp,\n"
+    "                             global double3*globaldouble3p,\n"
+    "                             global double3 *restrict globaldouble3restrictp,\n"
+    "                             global const double3* globalconstdouble3p,\n"
+    "                             global const double3 * restrict globalconstdouble3restrictp,\n"
+    "                             global volatile double3*globalvolatiledouble3p,\n"
+    "                             global volatile double3 *restrict globalvolatiledouble3restrictp,\n"
+    "                             global const volatile double3* globalconstvolatiledouble3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p2(global const volatile double3 * restrict globalconstvolatiledouble3restrictp,\n"
+    "                              local double3*localdouble3p,\n"
+    "                              local double3 *restrict localdouble3restrictp,\n"
+    "                              local const double3* localconstdouble3p,\n"
+    "                              local const double3 * restrict localconstdouble3restrictp,\n"
+    "                              local volatile double3*localvolatiledouble3p,\n"
+    "                              local volatile double3 *restrict localvolatiledouble3restrictp,\n"
+    "                              local const volatile double3* localconstvolatiledouble3p,\n"
+    "                              local const volatile double3 * restrict localconstvolatiledouble3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_d(double3 double3d,\n"
+    "                             const double3 constdouble3d,\n"
+    "                             private double3 privatedouble3d,\n"
+    "                             private const double3 privateconstdouble3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p(constant double4*constantdouble4p,\n"
+    "                             constant double4 *restrict constantdouble4restrictp,\n"
+    "                             global double4*globaldouble4p,\n"
+    "                             global double4 *restrict globaldouble4restrictp,\n"
+    "                             global const double4* globalconstdouble4p,\n"
+    "                             global const double4 * restrict globalconstdouble4restrictp,\n"
+    "                             global volatile double4*globalvolatiledouble4p,\n"
+    "                             global volatile double4 *restrict globalvolatiledouble4restrictp,\n"
+    "                             global const volatile double4* globalconstvolatiledouble4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p2(global const volatile double4 * restrict globalconstvolatiledouble4restrictp,\n"
+    "                              local double4*localdouble4p,\n"
+    "                              local double4 *restrict localdouble4restrictp,\n"
+    "                              local const double4* localconstdouble4p,\n"
+    "                              local const double4 * restrict localconstdouble4restrictp,\n"
+    "                              local volatile double4*localvolatiledouble4p,\n"
+    "                              local volatile double4 *restrict localvolatiledouble4restrictp,\n"
+    "                              local const volatile double4* localconstvolatiledouble4p,\n"
+    "                              local const volatile double4 * restrict localconstvolatiledouble4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_d(double4 double4d,\n"
+    "                             const double4 constdouble4d,\n"
+    "                             private double4 privatedouble4d,\n"
+    "                             private const double4 privateconstdouble4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p(constant double8*constantdouble8p,\n"
+    "                             constant double8 *restrict constantdouble8restrictp,\n"
+    "                             global double8*globaldouble8p,\n"
+    "                             global double8 *restrict globaldouble8restrictp,\n"
+    "                             global const double8* globalconstdouble8p,\n"
+    "                             global const double8 * restrict globalconstdouble8restrictp,\n"
+    "                             global volatile double8*globalvolatiledouble8p,\n"
+    "                             global volatile double8 *restrict globalvolatiledouble8restrictp,\n"
+    "                             global const volatile double8* globalconstvolatiledouble8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p2(global const volatile double8 * restrict globalconstvolatiledouble8restrictp,\n"
+    "                              local double8*localdouble8p,\n"
+    "                              local double8 *restrict localdouble8restrictp,\n"
+    "                              local const double8* localconstdouble8p,\n"
+    "                              local const double8 * restrict localconstdouble8restrictp,\n"
+    "                              local volatile double8*localvolatiledouble8p,\n"
+    "                              local volatile double8 *restrict localvolatiledouble8restrictp,\n"
+    "                              local const volatile double8* localconstvolatiledouble8p,\n"
+    "                              local const volatile double8 * restrict localconstvolatiledouble8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_d(double8 double8d,\n"
+    "                             const double8 constdouble8d,\n"
+    "                             private double8 privatedouble8d,\n"
+    "                             private const double8 privateconstdouble8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p(constant double16*constantdouble16p,\n"
+    "                              constant double16 *restrict constantdouble16restrictp,\n"
+    "                              global double16*globaldouble16p,\n"
+    "                              global double16 *restrict globaldouble16restrictp,\n"
+    "                              global const double16* globalconstdouble16p,\n"
+    "                              global const double16 * restrict globalconstdouble16restrictp,\n"
+    "                              global volatile double16*globalvolatiledouble16p,\n"
+    "                              global volatile double16 *restrict globalvolatiledouble16restrictp,\n"
+    "                              global const volatile double16* globalconstvolatiledouble16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p2(global const volatile double16 * restrict globalconstvolatiledouble16restrictp,\n"
+    "                               local double16*localdouble16p,\n"
+    "                               local double16 *restrict localdouble16restrictp,\n"
+    "                               local const double16* localconstdouble16p,\n"
+    "                               local const double16 * restrict localconstdouble16restrictp,\n"
+    "                               local volatile double16*localvolatiledouble16p,\n"
+    "                               local volatile double16 *restrict localvolatiledouble16restrictp,\n"
+    "                               local const volatile double16* localconstvolatiledouble16p,\n"
+    "                               local const volatile double16 * restrict localconstvolatiledouble16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_d(double16 double16d,\n"
+    "                              const double16 constdouble16d,\n"
+    "                              private double16 privatedouble16d,\n"
+    "                              private const double16 privateconstdouble16d)\n"
+    "{}\n",
+    "\n"
+static const char * double_arg_info[][77] = {
+    {
+        "double_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "constantdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "constantdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "globaldoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globaldoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "globalconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstdoublerestrictp",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalvolatiledoublep",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalconstvolatiledoublep",
+        NULL
+    },
+    {
+        "double_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "localdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "localconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localconstvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstvolatiledoublerestrictp",
+        NULL
+    },
+    {
+        "double_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "doubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "constdoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privatedoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privateconstdoubled",
+        NULL
+    },
+    {
+        "double_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "constantdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "constantdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "globaldouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globaldouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "globalconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalconstvolatiledouble2p",
+        NULL
+    },
+    {
+        "double_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "localdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "localconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localconstvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstvolatiledouble2restrictp",
+        NULL
+    },
+    {
+        "double_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "double2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "constdouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privatedouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privateconstdouble2d",
+        NULL
+    },
+    {
+        "double_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "constantdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "constantdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "globaldouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globaldouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "globalconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalconstvolatiledouble3p",
+        NULL
+    },
+    {
+        "double_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "localdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "localconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localconstvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstvolatiledouble3restrictp",
+        NULL
+    },
+    {
+        "double_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "double3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "constdouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privatedouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privateconstdouble3d",
+        NULL
+    },
+    {
+        "double_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "constantdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "constantdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "globaldouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globaldouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "globalconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalconstvolatiledouble4p",
+        NULL
+    },
+    {
+        "double_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "localdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "localconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localconstvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstvolatiledouble4restrictp",
+        NULL
+    },
+    {
+        "double_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "double4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "constdouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privatedouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privateconstdouble4d",
+        NULL
+    },
+    {
+        "double_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "constantdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "constantdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "globaldouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globaldouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "globalconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalconstvolatiledouble8p",
+        NULL
+    },
+    {
+        "double_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "localdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "localconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localconstvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstvolatiledouble8restrictp",
+        NULL
+    },
+    {
+        "double_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "double8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "constdouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privatedouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privateconstdouble8d",
+        NULL
+    },
+    {
+        "double_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "constantdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "constantdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "globaldouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globaldouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "globalconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalconstvolatiledouble16p",
+        NULL
+    },
+    {
+        "double_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "localdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "localconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localconstvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstvolatiledouble16restrictp",
+        NULL
+    },
+    {
+        "double_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "double16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "constdouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privatedouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privateconstdouble16d",
+        NULL
+    },
+// Support for optional half data type
+static const char * half_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
+    "\n"
+    "kernel void half_scalar_p(constant half*constanthalfp,\n"
+    "                          constant half *restrict constanthalfrestrictp,\n"
+    "                          global half*globalhalfp,\n"
+    "                          global half *restrict globalhalfrestrictp,\n"
+    "                          global const half* globalconsthalfp,\n"
+    "                          global const half * restrict globalconsthalfrestrictp,\n"
+    "                          global volatile half*globalvolatilehalfp,\n"
+    "                          global volatile half *restrict globalvolatilehalfrestrictp,\n"
+    "                          global const volatile half* globalconstvolatilehalfp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_scalar_p2(global const volatile half * restrict globalconstvolatilehalfrestrictp,\n"
+    "                           local half*localhalfp,\n"
+    "                           local half *restrict localhalfrestrictp,\n"
+    "                           local const half* localconsthalfp,\n"
+    "                           local const half * restrict localconsthalfrestrictp,\n"
+    "                           local volatile half*localvolatilehalfp,\n"
+    "                           local volatile half *restrict localvolatilehalfrestrictp,\n"
+    "                           local const volatile half* localconstvolatilehalfp,\n"
+    "                           local const volatile half * restrict localconstvolatilehalfrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_scalar_d(half halfd,\n"
+    "                          const half consthalfd,\n"
+    "                          private half privatehalfd,\n"
+    "                          private const half privateconsthalfd)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p(constant half2*constanthalf2p,\n"
+    "                           constant half2 *restrict constanthalf2restrictp,\n"
+    "                           global half2*globalhalf2p,\n"
+    "                           global half2 *restrict globalhalf2restrictp,\n"
+    "                           global const half2* globalconsthalf2p,\n"
+    "                           global const half2 * restrict globalconsthalf2restrictp,\n"
+    "                           global volatile half2*globalvolatilehalf2p,\n"
+    "                           global volatile half2 *restrict globalvolatilehalf2restrictp,\n"
+    "                           global const volatile half2* globalconstvolatilehalf2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p2(global const volatile half2 * restrict globalconstvolatilehalf2restrictp,\n"
+    "                            local half2*localhalf2p,\n"
+    "                            local half2 *restrict localhalf2restrictp,\n"
+    "                            local const half2* localconsthalf2p,\n"
+    "                            local const half2 * restrict localconsthalf2restrictp,\n"
+    "                            local volatile half2*localvolatilehalf2p,\n"
+    "                            local volatile half2 *restrict localvolatilehalf2restrictp,\n"
+    "                            local const volatile half2* localconstvolatilehalf2p,\n"
+    "                            local const volatile half2 * restrict localconstvolatilehalf2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_d(half2 half2d,\n"
+    "                           const half2 consthalf2d,\n"
+    "                           private half2 privatehalf2d,\n"
+    "                           private const half2 privateconsthalf2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p(constant half3*constanthalf3p,\n"
+    "                           constant half3 *restrict constanthalf3restrictp,\n"
+    "                           global half3*globalhalf3p,\n"
+    "                           global half3 *restrict globalhalf3restrictp,\n"
+    "                           global const half3* globalconsthalf3p,\n"
+    "                           global const half3 * restrict globalconsthalf3restrictp,\n"
+    "                           global volatile half3*globalvolatilehalf3p,\n"
+    "                           global volatile half3 *restrict globalvolatilehalf3restrictp,\n"
+    "                           global const volatile half3* globalconstvolatilehalf3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p2(global const volatile half3 * restrict globalconstvolatilehalf3restrictp,\n"
+    "                            local half3*localhalf3p,\n"
+    "                            local half3 *restrict localhalf3restrictp,\n"
+    "                            local const half3* localconsthalf3p,\n"
+    "                            local const half3 * restrict localconsthalf3restrictp,\n"
+    "                            local volatile half3*localvolatilehalf3p,\n"
+    "                            local volatile half3 *restrict localvolatilehalf3restrictp,\n"
+    "                            local const volatile half3* localconstvolatilehalf3p,\n"
+    "                            local const volatile half3 * restrict localconstvolatilehalf3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_d(half3 half3d,\n"
+    "                           const half3 consthalf3d,\n"
+    "                           private half3 privatehalf3d,\n"
+    "                           private const half3 privateconsthalf3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p(constant half4*constanthalf4p,\n"
+    "                           constant half4 *restrict constanthalf4restrictp,\n"
+    "                           global half4*globalhalf4p,\n"
+    "                           global half4 *restrict globalhalf4restrictp,\n"
+    "                           global const half4* globalconsthalf4p,\n"
+    "                           global const half4 * restrict globalconsthalf4restrictp,\n"
+    "                           global volatile half4*globalvolatilehalf4p,\n"
+    "                           global volatile half4 *restrict globalvolatilehalf4restrictp,\n"
+    "                           global const volatile half4* globalconstvolatilehalf4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p2(global const volatile half4 * restrict globalconstvolatilehalf4restrictp,\n"
+    "                            local half4*localhalf4p,\n"
+    "                            local half4 *restrict localhalf4restrictp,\n"
+    "                            local const half4* localconsthalf4p,\n"
+    "                            local const half4 * restrict localconsthalf4restrictp,\n"
+    "                            local volatile half4*localvolatilehalf4p,\n"
+    "                            local volatile half4 *restrict localvolatilehalf4restrictp,\n"
+    "                            local const volatile half4* localconstvolatilehalf4p,\n"
+    "                            local const volatile half4 * restrict localconstvolatilehalf4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_d(half4 half4d,\n"
+    "                           const half4 consthalf4d,\n"
+    "                           private half4 privatehalf4d,\n"
+    "                           private const half4 privateconsthalf4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p(constant half8*constanthalf8p,\n"
+    "                           constant half8 *restrict constanthalf8restrictp,\n"
+    "                           global half8*globalhalf8p,\n"
+    "                           global half8 *restrict globalhalf8restrictp,\n"
+    "                           global const half8* globalconsthalf8p,\n"
+    "                           global const half8 * restrict globalconsthalf8restrictp,\n"
+    "                           global volatile half8*globalvolatilehalf8p,\n"
+    "                           global volatile half8 *restrict globalvolatilehalf8restrictp,\n"
+    "                           global const volatile half8* globalconstvolatilehalf8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p2(global const volatile half8 * restrict globalconstvolatilehalf8restrictp,\n"
+    "                            local half8*localhalf8p,\n"
+    "                            local half8 *restrict localhalf8restrictp,\n"
+    "                            local const half8* localconsthalf8p,\n"
+    "                            local const half8 * restrict localconsthalf8restrictp,\n"
+    "                            local volatile half8*localvolatilehalf8p,\n"
+    "                            local volatile half8 *restrict localvolatilehalf8restrictp,\n"
+    "                            local const volatile half8* localconstvolatilehalf8p,\n"
+    "                            local const volatile half8 * restrict localconstvolatilehalf8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_d(half8 half8d,\n"
+    "                           const half8 consthalf8d,\n"
+    "                           private half8 privatehalf8d,\n"
+    "                           private const half8 privateconsthalf8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p(constant half16*constanthalf16p,\n"
+    "                            constant half16 *restrict constanthalf16restrictp,\n"
+    "                            global half16*globalhalf16p,\n"
+    "                            global half16 *restrict globalhalf16restrictp,\n"
+    "                            global const half16* globalconsthalf16p,\n"
+    "                            global const half16 * restrict globalconsthalf16restrictp,\n"
+    "                            global volatile half16*globalvolatilehalf16p,\n"
+    "                            global volatile half16 *restrict globalvolatilehalf16restrictp,\n"
+    "                            global const volatile half16* globalconstvolatilehalf16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p2(global const volatile half16 * restrict globalconstvolatilehalf16restrictp,\n"
+    "                             local half16*localhalf16p,\n"
+    "                             local half16 *restrict localhalf16restrictp,\n"
+    "                             local const half16* localconsthalf16p,\n"
+    "                             local const half16 * restrict localconsthalf16restrictp,\n"
+    "                             local volatile half16*localvolatilehalf16p,\n"
+    "                             local volatile half16 *restrict localvolatilehalf16restrictp,\n"
+    "                             local const volatile half16* localconstvolatilehalf16p,\n"
+    "                             local const volatile half16 * restrict localconstvolatilehalf16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_d(half16 half16d,\n"
+    "                            const half16 consthalf16d,\n"
+    "                            private half16 privatehalf16d,\n"
+    "                            private const half16 privateconsthalf16d)\n"
+    "{}\n",
+    "\n"
+static const char * half_arg_info[][77] = {
+    {
+        "half_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "constanthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "constanthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "globalhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "globalconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalconstvolatilehalfp",
+        NULL
+    },
+    {
+        "half_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconstvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "localhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "localconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localconstvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconstvolatilehalfrestrictp",
+        NULL
+    },
+    {
+        "half_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "halfd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "consthalfd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "privatehalfd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "privateconsthalfd",
+        NULL
+    },
+    {
+        "half_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "constanthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "constanthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "globalhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "globalconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalconstvolatilehalf2p",
+        NULL
+    },
+    {
+        "half_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconstvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "localhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "localconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localconstvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconstvolatilehalf2restrictp",
+        NULL
+    },
+    {
+        "half_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "half2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "consthalf2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "privatehalf2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "privateconsthalf2d",
+        NULL
+    },
+    {
+        "half_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "constanthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "constanthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "globalhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "globalconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalconstvolatilehalf3p",
+        NULL
+    },
+    {
+        "half_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconstvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "localhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "localconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localconstvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconstvolatilehalf3restrictp",
+        NULL
+    },
+    {
+        "half_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "half3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "consthalf3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "privatehalf3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "privateconsthalf3d",
+        NULL
+    },
+    {
+        "half_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "constanthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "constanthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "globalhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "globalconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalconstvolatilehalf4p",
+        NULL
+    },
+    {
+        "half_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconstvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "localhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "localconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localconstvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconstvolatilehalf4restrictp",
+        NULL
+    },
+    {
+        "half_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "half4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "consthalf4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "privatehalf4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "privateconsthalf4d",
+        NULL
+    },
+    {
+        "half_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "constanthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "constanthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "globalhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "globalconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalconstvolatilehalf8p",
+        NULL
+    },
+    {
+        "half_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconstvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "localhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "localconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localconstvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconstvolatilehalf8restrictp",
+        NULL
+    },
+    {
+        "half_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "half8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "consthalf8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "privatehalf8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "privateconsthalf8d",
+        NULL
+    },
+    {
+        "half_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "constanthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "constanthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "globalhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "globalconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalconstvolatilehalf16p",
+        NULL
+    },
+    {
+        "half_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconstvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "localhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "localconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localconstvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconstvolatilehalf16restrictp",
+        NULL
+    },
+    {
+        "half_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "half16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "consthalf16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "privatehalf16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "privateconsthalf16d",
+        NULL
+    },
+template<typename arg_info_t>
+int test(cl_device_id deviceID, cl_context context, kernel_args_t kernel_args, cl_uint lines_count, arg_info_t arg_info, size_t total_kernels_in_program) {
+    const size_t max_name_len = 512;
+    cl_char name[ max_name_len ];
+    cl_uint arg_count, numArgs;
+    size_t i, j, size;
+    int error;
+    clProgramWrapper program =
+    clCreateProgramWithSource(context, lines_count, kernel_args, NULL, &error);
+    if ( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create required arguments kernel program" );
+        return -1;
+    }
+    // Compile the program
+    log_info( "Building kernels...\n" );
+    clBuildProgram( program, 1, &deviceID, "-cl-kernel-arg-info", NULL, NULL );
+    // check for build errors and exit if things didn't work
+    size_t size_ret;
+    cl_build_status build_status;
+    error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof(build_status), &build_status, &size_ret);
+    test_error( error, "Unable to query build status" );
+    if (build_status == CL_BUILD_ERROR) {
+        printf("CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to get build log size" );
+        char *build_log = (char *)malloc(size_ret);
+        error = clGetProgramBuildInfo(program,deviceID, CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to get build log" );
+        printf("CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        printf("CL_BUILD_ERROR. exiting\n");
+        free(build_log);
+        return -1;
+    }
+    // Lookup the number of kernels in the program.
+    log_info( "Testing kernels...\n" );
+    size_t total_kernels = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_NUM_KERNELS, sizeof( size_t ), &total_kernels, NULL );
+    test_error( error, "Unable to get program info num kernels" );
+    if ( total_kernels != total_kernels_in_program )
+    {
+        print_error( error, "Program did not build all kernels" );
+        return -1;
+    }
+    // Lookup the kernel names.
+    size_t kernel_names_len = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, 0, NULL, &kernel_names_len );
+    test_error( error, "Unable to get length of kernel names list." );
+    size_t expected_kernel_names_len = 0;
+    for ( i = 0; i < total_kernels; ++i )
+    {
+        expected_kernel_names_len += 1 + strlen( arg_info[ i ][ 0 ] );
+    }
+    if ( kernel_names_len != expected_kernel_names_len )
+    {
+        log_error( "Kernel names string is not the right length, expected %d, got %d\n", (int) expected_kernel_names_len, (int) kernel_names_len );
+        return -1;
+    }
+    const size_t len = ( kernel_names_len + 1 ) * sizeof( char );
+    char* kernel_names = (char*) malloc( len );
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, len, kernel_names, &kernel_names_len );
+    test_error( error, "Unable to get kernel names list." );
+    // Check to see if the kernel name array is null terminated.
+    if ( kernel_names[ kernel_names_len - 1 ] != '\0' )
+    {
+        free( kernel_names );
+        print_error( error, "Kernel name list was not null terminated" );
+        return -1;
+    }
+    // Check to see if the correct kernel name string was returned.
+    // Does the string contain each expected kernel name?
+    for ( i = 0; i < total_kernels; ++i )
+        if ( !strstr( kernel_names, arg_info[ i ][ 0 ] ) )
+            break;
+    if ( i != total_kernels )
+    {
+        log_error( "Kernel names string is missing \"%s\"\n", arg_info[ i ][ 0 ] );
+        free( kernel_names );
+        return -1;
+    }
+    // Are the kernel names delimited by ';'?
+    if ( !strtok( kernel_names, ";" ) )
+    {
+        error = -1;
+    }
+    else
+    {
+        for ( i = 1; i < total_kernels; ++i )
+        {
+            if ( !strtok( NULL, ";" ) )
+            {
+                error = -1;
+            }
+        }
+    }
+    if ( error )
+    {
+        log_error( "Kernel names string was not properly delimited by ';'\n" );
+        free( kernel_names );
+        return -1;
+    }
+    free( kernel_names );
+    // Create kernel objects and query them.
+    int rc = 0;
+    for ( i = 0; i < total_kernels; ++i )
+    {
+        int kernel_rc = 0;
+        const char* kernel_name = arg_info[ i ][ 0 ];
+        clKernelWrapper kernel = clCreateKernel(program, kernel_name, &error);
+        if( kernel == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Could not get kernel: %s\n", kernel_name );
+            kernel_rc = -1;
+        }
+        if(kernel_rc == 0)
+        {
+            // Determine the expected number of arguments.
+            arg_count = 0;
+            while (arg_info[ i ][ (ARG_INFO_FIELD_COUNT * arg_count) + 1 ] != NULL)
+                ++arg_count;
+            // Try to get the number of arguments.
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &size );
+            test_error( error, "Unable to get kernel arg count param size" );
+            if( size != sizeof( numArgs ) )
+            {
+                log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d) for kernel: %s\n", (int)sizeof( numArgs ), (int)size, kernel_name );
+                kernel_rc = -1;
+            }
+        }
+        if(kernel_rc == 0)
+        {
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
+            test_error( error, "Unable to get kernel arg count" );
+            if( numArgs != arg_count )
+            {
+                log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d) for kernel: %s\n", arg_count, numArgs, kernel_name );
+                kernel_rc = -1;
+            }
+        }
+        if(kernel_rc == 0)
+        {
+            for ( j = 0; j < numArgs; ++j )
+            {
+                int arg_rc = 0;
+                cl_kernel_arg_address_qualifier expected_address_qualifier = (cl_kernel_arg_address_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ADDR_OFFSET ];
+                cl_kernel_arg_access_qualifier expected_access_qualifier =  (cl_kernel_arg_access_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ACCESS_OFFSET ];
+                cl_kernel_arg_type_qualifier expected_type_qualifier = (cl_kernel_arg_type_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_QUAL_OFFSET ];
+                const char* expected_type_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_NAME_OFFSET ];
+                const char* expected_arg_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ARG_NAME_OFFSET ];
+                // Try to get the address qualifier of each argument.
+                cl_kernel_arg_address_qualifier address_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof address_qualifier, &address_qualifier, &size );
+                test_error( error, "Unable to get argument address qualifier" );
+                error = (address_qualifier != expected_address_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad address qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_address_qualifier, (unsigned int)address_qualifier );
+                    arg_rc = -1;
+                }
+                // Try to get the access qualifier of each argument.
+                cl_kernel_arg_access_qualifier access_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof access_qualifier, &access_qualifier, &size );
+                test_error( error, "Unable to get argument access qualifier" );
+                error = (access_qualifier != expected_access_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad access qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_access_qualifier, (unsigned int)access_qualifier );
+                    arg_rc = -1;
+                }
+                // Try to get the type qualifier of each argument.
+                cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof arg_type_qualifier, &arg_type_qualifier, &size );
+                test_error( error, "Unable to get argument type qualifier" );
+                error = (arg_type_qualifier != expected_type_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad type qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_type_qualifier, (unsigned int)arg_type_qualifier );
+                    arg_rc = -1;
+                }
+                // Try to get the type of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo(kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument type name" );
+                error = strcmp( (const char*) name, expected_type_name );
+                if ( error )
+                {
+                    log_error( "ERROR: Bad argument type name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_type_name, name );
+                    arg_rc = -1;
+                }
+                // Try to get the name of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument name" );
+                error = strcmp( (const char*) name, expected_arg_name );
+                if ( error )
+                {
+                    log_error( "ERROR: Bad argument name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_arg_name, name );
+                    arg_rc = -1;
+                }
+                if(arg_rc != 0) {
+                    kernel_rc = -1;
+                }
+            }
+        }
+        //log_info( "%s ... %s\n",arg_info[i][0],kernel_rc == 0 ? "passed" : "failed" );
+        if(kernel_rc != 0) {
+            rc = -1;
+        }
+    }
+  return rc;
+int test_get_kernel_arg_info_compatibility( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    size_t size;
+    int error;
+    cl_bool supports_double = 0; // assume not
+    cl_bool supports_half = 0; // assume not
+  cl_bool supports_images = 0; // assume not
+    // Check if this device supports images
+  error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof supports_images, &supports_images, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
+  if (supports_images) {
+    log_info(" o Device supports images\n");
+    log_info(" o Expecting SUCCESS when testing image kernel arguments.\n");
+  }
+  else {
+    log_info(" o Device lacks image support\n");
+    log_info(" o Not testing image kernel arguments.\n");
+  }
+    if (is_extension_available(deviceID, "cl_khr_fp64")) {
+        log_info(" o Device claims extension 'cl_khr_fp64'\n");
+        log_info(" o Expecting SUCCESS when testing double kernel arguments.\n");
+        supports_double = 1;
+    } else {
+        cl_device_fp_config double_fp_config;
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(double_fp_config), &double_fp_config, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
+        if (double_fp_config != 0)
+            supports_double = 1;
+        else {
+            log_info(" o Device lacks extension 'cl_khr_fp64'\n");
+            log_info(" o Not testing double kernel arguments.\n");
+            supports_double = 0;
+        }
+    }
+    if (is_extension_available(deviceID, "cl_khr_fp16")) {
+        log_info(" o Device claims extension 'cl_khr_fp16'\n");
+        log_info(" o Expecting SUCCESS when testing halfn* kernel arguments.\n");
+        supports_half = 1;
+    } else {
+        log_info(" o Device lacks extension 'cl_khr_fp16'\n");
+        log_info(" o Not testing halfn* kernel arguments.\n");
+        supports_half = 0;
+    }
+  int test_failed = 0;
+    // Now create a test program using required arguments
+  log_info("Testing required kernel arguments...\n");
+  error = test(deviceID, context, required_kernel_args, sizeof(required_kernel_args)/sizeof(required_kernel_args[0]), required_arg_info, sizeof(required_arg_info)/sizeof(required_arg_info[0]));
+  test_failed = (error) ? -1 : test_failed;
+  if ( supports_images ) {
+    log_info("Testing optional image arguments...\n");
+    error = test(deviceID, context, image_kernel_args, sizeof(image_kernel_args)/sizeof(image_kernel_args[0]), image_arg_info, sizeof(image_arg_info)/sizeof(image_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+  }
+    if ( supports_double ) {
+    log_info("Testing optional double arguments...\n");
+    error = test(deviceID, context, double_kernel_args, sizeof(double_kernel_args)/sizeof(double_kernel_args[0]), double_arg_info, sizeof(double_arg_info)/sizeof(double_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+  }
+    if ( supports_half ) {
+    log_info("Testing optional half arguments...\n");
+    error = test(deviceID, context, half_kernel_args, sizeof(half_kernel_args)/sizeof(half_kernel_args[0]), half_arg_info, sizeof(half_arg_info)/sizeof(half_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+  }
+    return test_failed;
diff --git a/test_conformance/api/test_kernel_arg_multi_setup.cpp b/test_conformance/api/test_kernel_arg_multi_setup.cpp
index de3dc15..92c039e 100644
--- a/test_conformance/api/test_kernel_arg_multi_setup.cpp
+++ b/test_conformance/api/test_kernel_arg_multi_setup.cpp
@@ -66,39 +66,24 @@
     // Create input streams
     initData[ 0 ] = create_random_data( vec1Type, d, (unsigned int)threads[ 0 ] * vec1Size );
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       get_explicit_type_size(vec1Type) * threads[0] * vec1Size,
-                       initData[0], &error);
+    streams[ 0 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, initData[ 0 ], &error );
     test_error( error, "Unable to create testing stream" );
     initData[ 1 ] = create_random_data( vec2Type, d, (unsigned int)threads[ 0 ] * vec2Size );
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       get_explicit_type_size(vec2Type) * threads[0] * vec2Size,
-                       initData[1], &error);
+    streams[ 1 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, initData[ 1 ], &error );
     test_error( error, "Unable to create testing stream" );
     initData[ 2 ] = create_random_data( vec3Type, d, (unsigned int)threads[ 0 ] * vec3Size );
-    streams[2] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       get_explicit_type_size(vec3Type) * threads[0] * vec3Size,
-                       initData[2], &error);
+    streams[ 2 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, initData[ 2 ], &error );
     test_error( error, "Unable to create testing stream" );
-    streams[3] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        get_explicit_type_size(vec1Type) * threads[0] * vec1Size, NULL, &error);
+    streams[ 3 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, NULL, &error );
     test_error( error, "Unable to create testing stream" );
-    streams[4] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        get_explicit_type_size(vec2Type) * threads[0] * vec2Size, NULL, &error);
+    streams[ 4 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, NULL, &error );
     test_error( error, "Unable to create testing stream" );
-    streams[5] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        get_explicit_type_size(vec3Type) * threads[0] * vec3Size, NULL, &error);
+    streams[ 5 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, NULL, &error );
     test_error( error, "Unable to create testing stream" );
     // Set the arguments
diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp
deleted file mode 100644
index 2e4e0a7..0000000
--- a/test_conformance/api/test_kernel_attributes.cpp
+++ /dev/null
@@ -1,339 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <iostream>
-#include <vector>
-#include <string>
-#include <algorithm>
-#include "procs.h"
-#include "harness/errorHelpers.h"
-#include "harness/typeWrappers.h"
-#include "harness/parseParameters.h"
-using KernelAttributes = std::vector<std::string>;
-static std::string generate_kernel_source(const KernelAttributes& attributes)
-    std::string kernel;
-    for (auto attribute : attributes)
-    {
-        kernel += "__attribute__((" + attribute + "))\n";
-    }
-    kernel += "__kernel void test_kernel(){}";
-    return kernel;
-using AttributePermutations = std::vector<KernelAttributes>;
-// The following combinations have been chosen as they place each of the
-// attribute types in the different orders that they can occur. While distinct
-// permutations would provide a complete overview of the API the sheer number of
-// combinations increases the runtime of this test by an unreasonable amount
-AttributePermutations vect_tests;
-AttributePermutations work_tests;
-AttributePermutations reqd_tests;
-AttributePermutations vect_reqd_tests;
-AttributePermutations work_vect_tests;
-AttributePermutations reqd_work_tests;
-AttributePermutations vect_work_reqd_tests;
-AttributePermutations work_reqd_vect_tests;
-AttributePermutations reqd_vect_work_tests;
-// Generate a vector with vec_type_hint(<data_type>) so that it can be used to
-// generate different kernels
-static KernelAttributes generate_vec_type_hint_data(cl_device_id deviceID)
-    KernelAttributes vec_type_hint_data;
-    // TODO Test for signed vectors (char/short/int/etc)
-    std::vector<std::string> vector_types = { "uchar", "ushort", "uint",
-                                              "float" };
-    if (gHasLong)
-    {
-        vector_types.push_back("ulong");
-    }
-    if (device_supports_half(deviceID))
-    {
-        vector_types.push_back("half");
-    }
-    if (device_supports_double(deviceID))
-    {
-        vector_types.push_back("double");
-    }
-    const auto vector_sizes = { "2", "3", "4", "8", "16" };
-    for (auto type : vector_types)
-    {
-        for (auto size : vector_sizes)
-        {
-            vec_type_hint_data.push_back("vec_type_hint(" + type + size + ")");
-        }
-    }
-    return vec_type_hint_data;
-struct WorkGroupDimensions
-    int x;
-    int y;
-    int z;
-// Generate vectors to store reqd_work_group_size(<dimensions>) and
-// work_group_size_hint(<dimensions>) so that they can be used to generate
-// different kernels
-static KernelAttributes generate_reqd_work_group_size_data(
-    const std::vector<WorkGroupDimensions>& work_group_dimensions)
-    KernelAttributes reqd_work_group_size_data;
-    for (auto dimension : work_group_dimensions)
-    {
-        reqd_work_group_size_data.push_back(
-            "reqd_work_group_size(" + std::to_string(dimension.x) + ","
-            + std::to_string(dimension.y) + "," + std::to_string(dimension.z)
-            + ")");
-    }
-    return reqd_work_group_size_data;
-static KernelAttributes generate_work_group_size_data(
-    const std::vector<WorkGroupDimensions>& work_group_dimensions)
-    KernelAttributes work_group_size_hint_data;
-    for (auto dimension : work_group_dimensions)
-    {
-        work_group_size_hint_data.push_back(
-            "work_group_size_hint(" + std::to_string(dimension.x) + ","
-            + std::to_string(dimension.y) + "," + std::to_string(dimension.z)
-            + ")");
-    }
-    return work_group_size_hint_data;
-// Populate the Global Vectors which store individual Kernel Attributes
-static void populate_single_attribute_tests(
-    // Vectors to store the different data that fill the attributes
-    const KernelAttributes& vec_type_hint_data,
-    const KernelAttributes& work_group_size_hint_data,
-    const KernelAttributes& reqd_work_group_size_data)
-    for (auto vector_test : vec_type_hint_data)
-    {
-        // Initialise vec_type_hint attribute tests
-        vect_tests.push_back({ vector_test });
-    }
-    for (auto work_group_test : work_group_size_hint_data)
-    {
-        // Initialise work_group_size_hint attribute test
-        work_tests.push_back({ work_group_test });
-    }
-    for (auto reqd_work_group_test : reqd_work_group_size_data)
-    {
-        // Initialise reqd_work_group_size attribute tests
-        reqd_tests.push_back({ reqd_work_group_test });
-    }
-// Populate the Global Vectors which store the different permutations of 2
-// Kernel Attributes
-static void populate_double_attribute_tests(
-    const KernelAttributes& vec_type_hint_data,
-    const KernelAttributes& work_group_size_hint_data,
-    const KernelAttributes& reqd_work_group_size_data)
-    for (auto vector_test : vec_type_hint_data)
-    {
-        for (auto work_group_test : work_group_size_hint_data)
-        {
-            // Initialise the tests for the permutation of work_group_size_hint
-            // combined with vec_type_hint
-            work_vect_tests.push_back({ work_group_test, vector_test });
-        }
-        for (auto reqd_work_group_test : reqd_work_group_size_data)
-        {
-            // Initialise the tests for the permutation of vec_type_hint and
-            // reqd_work_group_size
-            vect_reqd_tests.push_back({ vector_test, reqd_work_group_test });
-        }
-    }
-    for (auto work_group_test : work_group_size_hint_data)
-    {
-        for (auto reqd_work_group_test : reqd_work_group_size_data)
-        {
-            // Initialse the tests for the permutation of reqd_work_group_size
-            // and  work_group_size_hint
-            reqd_work_tests.push_back(
-                { reqd_work_group_test, work_group_test });
-        }
-    }
-// Populate the Global Vectors which store the different permutations of 3
-// Kernel Attributes
-static void populate_triple_attribute_tests(
-    const KernelAttributes& vec_type_hint_data,
-    const KernelAttributes& work_group_size_hint_data,
-    const KernelAttributes& reqd_work_group_size_data)
-    for (auto vector_test : vec_type_hint_data)
-    {
-        for (auto work_group_test : work_group_size_hint_data)
-        {
-            for (auto reqd_work_group_test : reqd_work_group_size_data)
-            {
-                //  Initialise the chosen permutations of 3 attributes
-                vect_work_reqd_tests.push_back(
-                    { vector_test, work_group_test, reqd_work_group_test });
-                work_reqd_vect_tests.push_back(
-                    { work_group_test, reqd_work_group_test, vector_test });
-                reqd_vect_work_tests.push_back(
-                    { reqd_work_group_test, vector_test, work_group_test });
-            }
-        }
-    }
-static const std::vector<AttributePermutations*>
-generate_attribute_tests(const KernelAttributes& vec_type_hint_data,
-                         const KernelAttributes& work_group_size_hint_data,
-                         const KernelAttributes& reqd_work_group_size_data)
-    populate_single_attribute_tests(vec_type_hint_data,
-                                    work_group_size_hint_data,
-                                    reqd_work_group_size_data);
-    populate_double_attribute_tests(vec_type_hint_data,
-                                    work_group_size_hint_data,
-                                    reqd_work_group_size_data);
-    populate_triple_attribute_tests(vec_type_hint_data,
-                                    work_group_size_hint_data,
-                                    reqd_work_group_size_data);
-    // Store all of the filled vectors in a single structure
-    const std::vector<AttributePermutations*> all_tests = {
-        &vect_tests,           &work_tests,           &reqd_tests,
-        &work_vect_tests,      &vect_reqd_tests,      &reqd_work_tests,
-        &vect_work_reqd_tests, &work_reqd_vect_tests, &reqd_vect_work_tests
-    };
-    return all_tests;
-static const std::vector<AttributePermutations*>
-initialise_attribute_data(cl_device_id deviceID)
-    // This vector stores different work group dimensions that can be used by
-    // the reqd_work_group_size and work_group_size_hint attributes. It
-    // currently only has a single value to minimise time complexity of the
-    // overall test but can be easily changed.
-    static const std::vector<WorkGroupDimensions> work_group_dimensions = {
-        { 1, 1, 1 }
-    };
-    KernelAttributes vec_type_hint_data = generate_vec_type_hint_data(deviceID);
-    KernelAttributes work_group_size_hint_data =
-        generate_work_group_size_data(work_group_dimensions);
-    KernelAttributes reqd_work_group_size_data =
-        generate_reqd_work_group_size_data(work_group_dimensions);
-    // Generate all the permutations of attributes to create different test
-    // suites
-    return generate_attribute_tests(vec_type_hint_data,
-                                    work_group_size_hint_data,
-                                    reqd_work_group_size_data);
-static bool run_test(cl_context context, cl_device_id deviceID,
-                     const AttributePermutations& permutations)
-    bool success = true;
-    for (auto attribute_permutation : permutations)
-    {
-        std::string kernel_source_string =
-            generate_kernel_source(attribute_permutation);
-        const char* kernel_src = kernel_source_string.c_str();
-        clProgramWrapper program;
-        clKernelWrapper kernel;
-        cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                                 &kernel_src, "test_kernel");
-        test_error(err, "create_single_kernel_helper");
-        // Get the size of the kernel attribute string returned
-        size_t size = 0;
-        err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, 0, nullptr, &size);
-        test_error(err, "clGetKernelInfo");
-        std::vector<char> attributes(size);
-        err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, attributes.size(),
-                    , nullptr);
-        test_error(err, "clGetKernelInfo");
-        std::string attribute_string(;
-        attribute_string.erase(
-            std::remove(attribute_string.begin(), attribute_string.end(), ' '),
-            attribute_string.end());
-        if (gCompilationMode != kOnline)
-        {
-            if (!attribute_string.empty())
-            {
-                success = false;
-                log_error("Error: Expected an empty string\n");
-                log_error("Attribute string reported as: %s\n",
-                          attribute_string.c_str());
-            }
-        }
-        else
-        {
-            bool permutation_success = true;
-            for (auto attribute : attribute_permutation)
-            {
-                if (attribute_string.find(attribute) == std::string::npos)
-                {
-                    success = false;
-                    permutation_success = false;
-                    log_error("ERROR: did not find expected attribute: '%s'\n",
-                              attribute.c_str());
-                }
-            }
-            if (!permutation_success)
-            {
-                log_error("Attribute string reported as: %s\n",
-                          attribute_string.c_str());
-            }
-        }
-    }
-    return success;
-int test_kernel_attributes(cl_device_id deviceID, cl_context context,
-                           cl_command_queue queue, int num_elements)
-    bool success = true;
-    // Vector to store all of the tests
-    const std::vector<AttributePermutations*> all_tests =
-        initialise_attribute_data(deviceID);
-    for (auto permutations : all_tests)
-    {
-        success = success && run_test(context, deviceID, *permutations);
-    }
-    return success ? TEST_PASS : TEST_FAIL;
diff --git a/test_conformance/api/test_kernel_private_memory_size.cpp b/test_conformance/api/test_kernel_private_memory_size.cpp
deleted file mode 100644
index a789b4d..0000000
--- a/test_conformance/api/test_kernel_private_memory_size.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "harness/errorHelpers.h"
-#include "harness/typeWrappers.h"
-#include <iostream>
-#include "procs.h"
-int test_kernel_private_memory_size(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements)
-    const char* TEST_KERNEL =
-        R"(__kernel void private_memory( __global uint *buffer ){
-         volatile __private uint x[1];
-         buffer[0] = x[0];
-         })";
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                             &TEST_KERNEL, "private_memory");
-    test_error(err, "create_single_kernel_helper");
-    cl_ulong size = CL_ULONG_MAX;
-    err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_PRIVATE_MEM_SIZE,
-                                   sizeof(cl_ulong), &size, nullptr);
-    test_error(err, "clGetKernelWorkGroupInfo");
-    return TEST_PASS;
diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp
index d25410b..993a72f 100644
--- a/test_conformance/api/test_kernels.cpp
+++ b/test_conformance/api/test_kernels.cpp
@@ -192,10 +192,10 @@
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_float) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_int) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
@@ -323,10 +323,10 @@
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_float) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_int) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
@@ -412,15 +412,15 @@
     free_mtdata(d); d = NULL;
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
                                 sizeof(cl_int) * num_elements,
                       , &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
                                 sizeof(cl_int) * num_elements,
                       , &error);
     test_error( error, "Creating test array failed" );
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_int) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
@@ -496,11 +496,11 @@
     free_mtdata(d); d = NULL;
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
                                 sizeof(image_pair_t) * num_elements,
                                 (void *), &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_int) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
@@ -593,11 +593,11 @@
     free_mtdata(d); d = NULL;
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
                                 sizeof(cl_int) * num_elements,
                       , &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_int) * num_elements, NULL, &error);
     test_error( error, "Creating test array failed" );
diff --git a/test_conformance/api/test_mem_object_properties_queries.cpp b/test_conformance/api/test_mem_object_properties_queries.cpp
deleted file mode 100644
index 55300a6..0000000
--- a/test_conformance/api/test_mem_object_properties_queries.cpp
+++ /dev/null
@@ -1,362 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include "harness/propertyHelpers.h"
-#include "harness/typeWrappers.h"
-#include <vector>
-#include <algorithm>
-typedef enum
-    image,
-    image_with_properties,
-    buffer,
-    buffer_with_properties,
-    subbuffer,
-} test_type;
-struct test_data
-    test_type type;
-    std::vector<cl_mem_properties> properties;
-    std::string description;
-    cl_kernel kernel;
-static int create_object_and_check_properties(cl_context context,
-                                              clMemWrapper& test_object,
-                                              test_data test_case,
-                                              cl_mem_flags flags,
-                                              std::vector<cl_uint> local_data,
-                                              cl_uint size_x, cl_uint size_y)
-    cl_int error = CL_SUCCESS;
-    switch (test_case.type)
-    {
-        case image: {
-            cl_image_format format = { 0 };
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_UNSIGNED_INT32;
-            test_object = clCreateImage2D(context, flags, &format, size_x,
-                                          size_y, 0,, &error);
-            test_error(error, "clCreateImage2D failed");
-        }
-        break;
-        case image_with_properties: {
-            cl_image_format format = { 0 };
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_UNSIGNED_INT32;
-            cl_image_desc desc = { 0 };
-            desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-            desc.image_width = size_x;
-            desc.image_height = size_y;
-            if ( == 0)
-            {
-                test_object = clCreateImageWithProperties(
-                    context, NULL, flags, &format, &desc,,
-                    &error);
-            }
-            else
-            {
-                test_object = clCreateImageWithProperties(
-                    context,, flags, &format, &desc,
-          , &error);
-            }
-            test_error(error, "clCreateImageWithProperties failed");
-        }
-        break;
-        case buffer: {
-            test_object = clCreateBuffer(context, flags,
-                                         local_data.size() * sizeof(cl_uint),
-                               , &error);
-            test_error(error, "clCreateBuffer failed");
-        }
-        case buffer_with_properties: {
-            if ( == 0)
-            {
-                test_object = clCreateBufferWithProperties(
-                    context, NULL, flags, local_data.size() * sizeof(cl_uint),
-          , &error);
-            }
-            else
-            {
-                test_object = clCreateBufferWithProperties(
-                    context,, flags,
-                    local_data.size() * sizeof(cl_uint),,
-                    &error);
-            }
-            test_error(error, "clCreateBufferWithProperties failed.");
-        }
-        break;
-        case subbuffer: {
-            clMemWrapper parent_object;
-            if ( == 0)
-            {
-                parent_object = clCreateBufferWithProperties(
-                    context, NULL, flags, local_data.size() * sizeof(cl_uint),
-          , &error);
-            }
-            else
-            {
-                parent_object = clCreateBufferWithProperties(
-                    context,, flags,
-                    local_data.size() * sizeof(cl_uint),,
-                    &error);
-            }
-            test_error(error, "clCreateBufferWithProperties failed.");
-            cl_mem_flags subbuffer_flags = flags
-            cl_buffer_region region = { 0 };
-            region.origin = 0;
-            region.size = local_data.size() * sizeof(cl_uint);
-            test_object = clCreateSubBuffer(parent_object, subbuffer_flags,
-                                            CL_BUFFER_CREATE_TYPE_REGION,
-                                            &region, &error);
-            test_error(error, "clCreateSubBuffer failed.");
-        }
-        break;
-        default: log_error("Unknown test type!"); return TEST_FAIL;
-    }
-    std::vector<cl_mem_properties> check_properties;
-    size_t set_size = 0;
-    error =
-        clGetMemObjectInfo(test_object, CL_MEM_PROPERTIES, 0, NULL, &set_size);
-    test_error(error,
-               "clGetMemObjectInfo failed asking for CL_MEM_PROPERTIES size.");
-    // Buffers, subbuffers, and images must return no properties.
-    if (test_case.type == buffer || test_case.type == subbuffer
-        || test_case.type == image)
-    {
-        if (set_size == 0)
-        {
-            return TEST_PASS;
-        }
-        else
-        {
-            log_error("Queried properties must have size equal to zero for "
-                      "buffers, subbuffers, and images.");
-            return TEST_FAIL;
-        }
-    }
-    if (set_size == 0 && == 0)
-    {
-        return TEST_PASS;
-    }
-    if (set_size != * sizeof(cl_mem_properties))
-    {
-        log_error("ERROR: CL_MEM_PROPERTIES size is %d, expected %d.\n",
-                  set_size,
-         * sizeof(cl_queue_properties));
-        return TEST_FAIL;
-    }
-    cl_uint number_of_props = set_size / sizeof(cl_mem_properties);
-    check_properties.resize(number_of_props);
-    error = clGetMemObjectInfo(test_object, CL_MEM_PROPERTIES, set_size,
-                     , NULL);
-    test_error(error,
-               "clGetMemObjectInfo failed asking for CL_MEM_PROPERTIES.");
-    error = compareProperties(check_properties,;
-    return error;
-static int run_test_query_properties(cl_context context, cl_command_queue queue,
-                                     test_data test_case)
-    int error = CL_SUCCESS;
-    log_info("\nTC description: %s\n", test_case.description.c_str());
-    clMemWrapper obj_src;
-    clMemWrapper obj_dst;
-    clEventWrapper event;
-    MTdata init_generator = init_genrand(gRandomSeed);
-    cl_mem_flags flags;
-    cl_uint size_x = 4;
-    cl_uint size_y = 4;
-    size_t size = size_x * size_y * 4;
-    size_t global_dim[2] = { size_x, size_y };
-    const size_t origin[3] = { 0, 0, 0 };
-    const size_t region[3] = { size_x, size_y, 1 };
-    std::vector<cl_uint> src_data(size);
-    std::vector<cl_uint> dst_data(size);
-    generate_random_data(kUInt, size, init_generator,;
-    generate_random_data(kUInt, size, init_generator,;
-    free_mtdata(init_generator);
-    init_generator = NULL;
-    error = create_object_and_check_properties(context, obj_src, test_case,
-                                               flags, src_data, size_x, size_y);
-    test_error(error, "create_object_and_check_properties obj_src failed.");
-    error = create_object_and_check_properties(context, obj_dst, test_case,
-                                               flags, dst_data, size_x, size_y);
-    test_error(error, "create_object_and_check_properties obj_dst failed.");
-    error = clSetKernelArg(test_case.kernel, 0, sizeof(obj_src), &obj_src);
-    test_error(error, "clSetKernelArg 0 failed.");
-    error = clSetKernelArg(test_case.kernel, 1, sizeof(obj_dst), &obj_dst);
-    test_error(error, "clSetKernelArg 1 failed.");
-    switch (test_case.type)
-    {
-        case image:
-        case image_with_properties: {
-            error = clEnqueueNDRangeKernel(queue, test_case.kernel, 2, NULL,
-                                           global_dim, NULL, 0, NULL, &event);
-            test_error(error, "clEnqueueNDRangeKernel failed.");
-            error = clWaitForEvents(1, &event);
-            test_error(error, "clWaitForEvents failed.");
-            error = clEnqueueReadImage(queue, obj_dst, CL_TRUE, origin, region,
-                                       0, 0,, 0, NULL, NULL);
-            test_error(error, "clEnqueueReadImage failed.");
-        }
-        break;
-        case buffer:
-        case buffer_with_properties:
-        case subbuffer: {
-            error = clEnqueueNDRangeKernel(queue, test_case.kernel, 1, NULL,
-                                           &size, NULL, 0, NULL, &event);
-            test_error(error, "clEnqueueNDRangeKernel failed.");
-            error = clWaitForEvents(1, &event);
-            test_error(error, "clWaitForEvents failed.");
-            error = clEnqueueReadBuffer(queue, obj_dst, CL_TRUE, 0,
-                                        dst_data.size() * sizeof(cl_uint),
-                              , 0, NULL, NULL);
-            test_error(error, "clEnqueueReadBuffer failed.");
-        }
-        break;
-        default: log_error("Unknown test type!"); return TEST_FAIL;
-    }
-    for (size_t i = 0; i < size; ++i)
-    {
-        if (dst_data[i] != src_data[i])
-        {
-            log_error("ERROR: Output results mismatch.");
-            return TEST_FAIL;
-        }
-    }
-    log_info("TC result: passed\n");
-    return TEST_PASS;
-int test_image_properties_queries(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements)
-    int error = CL_SUCCESS;
-    cl_bool supports_images = CL_TRUE;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT,
-                            sizeof(supports_images), &supports_images, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
-    if (supports_images == CL_FALSE)
-    {
-        log_info("No image support on current device - skipped\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    const char* kernel_src = R"CLC(
-        __kernel void data_copy(read_only image2d_t src, write_only image2d_t dst)
-        {
-            int tid_x = get_global_id(0);
-            int tid_y = get_global_id(1);
-            int2 coords = (int2)(tid_x, tid_y);
-            uint4 val = read_imageui(src, coords);
-            write_imageui(dst, coords, val);
-        }
-        )CLC";
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &kernel_src, "data_copy");
-    test_error(error, "create_single_kernel_helper failed");
-    std::vector<test_data> test_cases;
-    test_cases.push_back({ image, {}, "regular image", kernel });
-    test_cases.push_back(
-        { image_with_properties, { 0 }, "image, 0 properties", kernel });
-    test_cases.push_back(
-        { image_with_properties, {}, "image, NULL properties", kernel });
-    for (auto test_case : test_cases)
-    {
-        error |= run_test_query_properties(context, queue, test_case);
-    }
-    return error;
-int test_buffer_properties_queries(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements)
-    int error = CL_SUCCESS;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    const char* kernel_src = R"CLC(
-        __kernel void data_copy(__global int *src, __global int *dst)
-        {
-            int  tid = get_global_id(0);
-            dst[tid] = src[tid];
-        }
-        )CLC";
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &kernel_src, "data_copy");
-    test_error(error, "create_single_kernel_helper failed");
-    std::vector<test_data> test_cases;
-    test_cases.push_back({ buffer, {}, "regular buffer", kernel });
-    test_cases.push_back(
-        { buffer_with_properties, { 0 }, "buffer with 0 properties", kernel });
-    test_cases.push_back(
-        { buffer_with_properties, {}, "buffer with NULL properties", kernel });
-    test_cases.push_back(
-        { subbuffer, { 0 }, "subbuffer with 0 properties", kernel });
-    test_cases.push_back(
-        { subbuffer, {}, "subbuffer with NULL properties", kernel });
-    for (auto test_case : test_cases)
-    {
-        error |= run_test_query_properties(context, queue, test_case);
-    }
-    return error;
diff --git a/test_conformance/api/test_mem_objects.cpp b/test_conformance/api/test_mem_objects.cpp
index c29613f..b0dc99d 100644
--- a/test_conformance/api/test_mem_objects.cpp
+++ b/test_conformance/api/test_mem_objects.cpp
@@ -1,6 +1,6 @@
-// Copyright (c) 2020 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,109 +17,92 @@
 static volatile cl_int sDestructorIndex;
-void CL_CALLBACK mem_destructor_callback(cl_mem memObject, void *userData)
+void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
-    int *userPtr = (int *)userData;
+    int * userPtr = (int *)userData;
-    // ordering of callbacks is guaranteed, meaning we don't need to do atomic
-    // operation here
+    // ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
     *userPtr = ++sDestructorIndex;
-int test_mem_object_destructor_callback_single(clMemWrapper &memObject)
+#ifndef ABS
+#define ABS( x ) ( ( x < 0 ) ? -x : x )
+int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
     cl_int error;
+    int i;
     // Set up some variables to catch the order in which callbacks are called
-    volatile int callbackOrders[3] = { 0, 0, 0 };
+    volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
     sDestructorIndex = 0;
     // Set up the callbacks
-    error = clSetMemObjectDestructorCallback(memObject, mem_destructor_callback,
-                                             (void *)&callbackOrders[0]);
-    test_error(error, "Unable to set destructor callback");
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
+    test_error( error, "Unable to set destructor callback" );
-    error = clSetMemObjectDestructorCallback(memObject, mem_destructor_callback,
-                                             (void *)&callbackOrders[1]);
-    test_error(error, "Unable to set destructor callback");
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
+    test_error( error, "Unable to set destructor callback" );
-    error = clSetMemObjectDestructorCallback(memObject, mem_destructor_callback,
-                                             (void *)&callbackOrders[2]);
-    test_error(error, "Unable to set destructor callback");
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
+    test_error( error, "Unable to set destructor callback" );
     // Now release the buffer, which SHOULD call the callbacks
-    error = clReleaseMemObject(memObject);
-    test_error(error, "Unable to release test buffer");
+    error = clReleaseMemObject( memObject );
+    test_error( error, "Unable to release test buffer" );
-    // Note: since we manually released the mem wrapper, we need to set it to
-    // NULL to prevent a double-release
+    // Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
     memObject = NULL;
     // At this point, all three callbacks should have already been called
     int numErrors = 0;
-    for (int i = 0; i < 3; i++)
+    for(  i = 0; i < 3; i++ )
-        // Spin waiting for the release to finish.  If you don't call the
-        // mem_destructor_callback, you will not pass the test.  bugzilla 6316
-        log_info("\tWaiting for callback %d...\n", i);
-        int wait = 0;
-        while (0 == callbackOrders[i])
-        {
-            usleep(100000); // 1/10th second
-            if (++wait >= 10 * 10)
-            {
-                log_error("\tERROR: Callback %d was not called within 10 "
-                          "seconds!  Assuming failure.\n",
-                          i + 1);
-                numErrors++;
-                break;
-            }
-        }
+        // Spin waiting for the release to finish.  If you don't call the mem_destructor_callback, you will not
+        // pass the test.  bugzilla 6316
+        while( 0 == callbackOrders[i] )
+        {}
-        if (callbackOrders[i] != 3 - i)
+        if( ABS( callbackOrders[ i ] ) != 3-i )
-            log_error("\tERROR: Callback %d was called in the wrong order! "
-                      "(Was called order %d, should have been order %d)\n",
-                      i + 1, callbackOrders[i], 3 - i);
+            log_error( "\tERROR: Callback %d was called in the wrong order! (Was called order %d, should have been order %d)\n",
+                      i+1, ABS( callbackOrders[ i ] ), i );
-    return (numErrors > 0) ? TEST_FAIL : TEST_PASS;
+    return ( numErrors > 0 ) ? -1 : 0;
-int test_mem_object_destructor_callback(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements)
+int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
     clMemWrapper testBuffer, testImage;
     cl_int error;
     // Create a buffer and an image to test callbacks against
-    testBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, 1024, NULL, &error);
-    test_error(error, "Unable to create testing buffer");
+    testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
+    test_error( error, "Unable to create testing buffer" );
-    if (test_mem_object_destructor_callback_single(testBuffer) != TEST_PASS)
+    if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
-        log_error("ERROR: Destructor callbacks for buffer object FAILED\n");
-        return TEST_FAIL;
+        log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
+        return -1;
-    if (checkForImageSupport(deviceID) == 0)
+    if( checkForImageSupport( deviceID ) == 0 )
         cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
-        testImage = create_image_2d(context, CL_MEM_READ_ONLY, &imageFormat, 16,
-                                    16, 0, NULL, &error);
-        test_error(error, "Unable to create testing image");
+        testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
+        test_error( error, "Unable to create testing image" );
-        if (test_mem_object_destructor_callback_single(testImage) != TEST_PASS)
+        if( test_mem_object_destructor_callback_single( testImage ) != 0 )
-            log_error("ERROR: Destructor callbacks for image object FAILED\n");
-            return TEST_FAIL;
+            log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
+            return -1;
-    return TEST_PASS;
+    return 0;
diff --git a/test_conformance/api/test_min_image_formats.cpp b/test_conformance/api/test_min_image_formats.cpp
deleted file mode 100644
index f6a3546..0000000
--- a/test_conformance/api/test_min_image_formats.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-int test_min_image_formats(cl_device_id device, cl_context context,
-                           cl_command_queue queue, int num_elements)
-    int missingFormats = 0;
-    cl_int error = CL_SUCCESS;
-    Version version = get_device_cl_version(device);
-    cl_bool supports_images = CL_FALSE;
-    error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
-                            sizeof(supports_images), &supports_images, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
-    if (supports_images == CL_FALSE)
-    {
-        log_info("No image support on current device - skipped\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-    const cl_mem_object_type image_types[] = {
-    };
-    const cl_mem_flags mem_flags[] = {
-        CL_MEM_READ_ONLY,
-    };
-    cl_bool supports_read_write_images = CL_FALSE;
-    if (version >= Version(3, 0))
-    {
-        cl_uint maxReadWriteImageArgs = 0;
-        error = clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
-                                sizeof(maxReadWriteImageArgs),
-                                &maxReadWriteImageArgs, NULL);
-        test_error(error,
-                   "Unable to query "
-                   "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS");
-        // read-write images are supported if MAX_READ_WRITE_IMAGE_ARGS is
-        // nonzero
-        supports_read_write_images =
-            maxReadWriteImageArgs != 0 ? CL_TRUE : CL_FALSE;
-    }
-    else if (version >= Version(2, 0))
-    {
-        // read-write images are required for OpenCL 2.x
-        supports_read_write_images = CL_TRUE;
-    }
-    int supports_3D_image_writes =
-        is_extension_available(device, "cl_khr_3d_image_writes");
-    for (int t = 0; t < ARRAY_SIZE(image_types); t++)
-    {
-        const cl_mem_object_type type = image_types[t];
-        log_info("    testing %s...\n", convert_image_type_to_string(type));
-        for (int f = 0; f < ARRAY_SIZE(mem_flags); f++)
-        {
-            const cl_mem_flags flags = mem_flags[f];
-            const char* testTypeString = flags == CL_MEM_READ_ONLY
-                ? "read-only"
-                : flags == CL_MEM_WRITE_ONLY
-                    ? "write only"
-                    : flags == CL_MEM_KERNEL_READ_AND_WRITE ? "read and write"
-                                                            : "unknown???";
-            if (flags == CL_MEM_KERNEL_READ_AND_WRITE
-                && !supports_read_write_images)
-            {
-                continue;
-            }
-            if (type == CL_MEM_OBJECT_IMAGE3D && flags != CL_MEM_READ_ONLY
-                && !supports_3D_image_writes)
-            {
-                continue;
-            }
-            cl_uint numImageFormats = 0;
-            error = clGetSupportedImageFormats(context, flags, type, 0, NULL,
-                                               &numImageFormats);
-            test_error(error, "Unable to query number of image formats");
-            std::vector<cl_image_format> supportedFormats(numImageFormats);
-            if (numImageFormats != 0)
-            {
-                error = clGetSupportedImageFormats(
-                    context, flags, type, supportedFormats.size(),
-          , NULL);
-                test_error(error, "Unable to query image formats");
-            }
-            std::vector<cl_image_format> requiredFormats;
-            build_required_image_formats(flags, type, device, requiredFormats);
-            for (auto& format : requiredFormats)
-            {
-                if (!find_format(,
-                                 supportedFormats.size(), &format))
-                {
-                    log_error(
-                        "Missing required %s format %s + %s.\n", testTypeString,
-                        GetChannelOrderName(format.image_channel_order),
-                        GetChannelTypeName(format.image_channel_data_type));
-                    ++missingFormats;
-                }
-            }
-        }
-    }
-    return missingFormats == 0 ? TEST_PASS : TEST_FAIL;
diff --git a/test_conformance/api/test_null_buffer_arg.cpp b/test_conformance/api/test_null_buffer_arg.cpp
index d412d4e..ba43f18 100644
--- a/test_conformance/api/test_null_buffer_arg.cpp
+++ b/test_conformance/api/test_null_buffer_arg.cpp
@@ -157,13 +157,14 @@
     // prep kernel:
     if (gIsEmbedded)
-        status = create_single_kernel_helper(context, &program, &kernel, 1,
-                                             &kernel_string, "test_kernel");
+        status = create_single_kernel_helper(context, &program, NULL, 1, &kernel_string, NULL);
-        status = create_single_kernel_helper(
-            context, &program, &kernel, 1, &kernel_string_long, "test_kernel");
+        status = create_single_kernel_helper(context, &program, NULL, 1, &kernel_string_long, NULL);
-    test_error(status, "Unable to create kernel");
+    test_error(status, "Unable to build test program");
+    kernel = clCreateKernel(program, "test_kernel", &status);
+    test_error(status, "CreateKernel failed.");
     cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
         NULL, NULL);
diff --git a/test_conformance/api/test_pipe_properties_queries.cpp b/test_conformance/api/test_pipe_properties_queries.cpp
deleted file mode 100644
index db91895..0000000
--- a/test_conformance/api/test_pipe_properties_queries.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include "harness/typeWrappers.h"
-#include <vector>
-struct test_query_pipe_properties_data
-    std::vector<cl_pipe_properties> properties;
-    std::string description;
-static int create_pipe_and_check_array_properties(
-    cl_context context, const test_query_pipe_properties_data& test_case)
-    log_info("TC description: %s\n", test_case.description.c_str());
-    cl_int error = CL_SUCCESS;
-    clMemWrapper test_pipe;
-    if ( > 0)
-    {
-        test_pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, 4, 4,
-                       , &error);
-        test_error(error, "clCreatePipe failed");
-    }
-    else
-    {
-        test_pipe =
-            clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, 4, 4, NULL, &error);
-        test_error(error, "clCreatePipe failed");
-    }
-    std::vector<cl_pipe_properties> check_properties;
-    size_t set_size = 0;
-    error = clGetPipeInfo(test_pipe, CL_PIPE_PROPERTIES, 0, NULL, &set_size);
-    test_error(error,
-               "clGetPipeInfo failed asking for "
-               "CL_PIPE_PROPERTIES size.");
-    if (set_size == 0 && == 0)
-    {
-        return TEST_PASS;
-    }
-    if (set_size != * sizeof(cl_pipe_properties))
-    {
-        log_error("ERROR: CL_PIPE_PROPERTIES size is %d, expected %d.\n",
-                  set_size,
-         * sizeof(cl_pipe_properties));
-        return TEST_FAIL;
-    }
-    log_error("Unexpected test case size.  This test needs to be updated to "
-              "compare pipe properties.\n");
-    return TEST_FAIL;
-int test_pipe_properties_queries(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements)
-    cl_int error = CL_SUCCESS;
-    cl_bool pipeSupport = CL_FALSE;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_SUPPORT,
-                            sizeof(pipeSupport), &pipeSupport, NULL);
-    test_error(error, "Unable to query CL_DEVICE_PIPE_SUPPORT");
-    if (pipeSupport == CL_FALSE)
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
-    int result = TEST_PASS;
-    std::vector<test_query_pipe_properties_data> test_cases;
-    test_cases.push_back({ {}, "NULL properties" });
-    for (auto test_case : test_cases)
-    {
-        result |= create_pipe_and_check_array_properties(context, test_case);
-    }
-    return result;
diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp
index 469a193..0acbe40 100644
--- a/test_conformance/api/test_queries.cpp
+++ b/test_conformance/api/test_queries.cpp
@@ -15,10 +15,8 @@
 #include "testBase.h"
 #include "harness/imageHelpers.h"
-#include "harness/propertyHelpers.h"
 #include <stdlib.h>
 #include <ctype.h>
-#include <algorithm>
 int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
@@ -131,337 +129,158 @@
     return 0;
-template <typename T>
-int sampler_param_test(cl_sampler sampler, cl_sampler_info param_name,
-                       T expected, const char *name)
+int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-    size_t size;
-    T val;
-    int error = clGetSamplerInfo(sampler, param_name, sizeof(val), &val, &size);
-    test_error(error, "Unable to get sampler info");
-    if (val != expected)
-    {
-        test_fail("ERROR: Sampler %s did not validate!\n", name);
-    }
-    if (size != sizeof(val))
-    {
-        test_fail("ERROR: Returned size of sampler %s does not validate! "
-                  "(expected %d, got %d)\n",
-                  name, (int)sizeof(val), (int)size);
-    }
-    return 0;
-static cl_int normalized_coord_values[] = { CL_TRUE, CL_FALSE };
-static cl_addressing_mode addressing_mode_values[] = {
-static cl_filter_mode filter_mode_values[] = { CL_FILTER_NEAREST,
-                                               CL_FILTER_LINEAR };
-int test_sampler_params(cl_device_id deviceID, cl_context context,
-                        bool is_compatibility, int norm_coord_num,
-                        int addr_mod_num, int filt_mod_num)
-    cl_uint refCount;
-    size_t size;
     int error;
+    size_t size;
-    clSamplerWrapper sampler;
     cl_sampler_properties properties[] = {
-        normalized_coord_values[norm_coord_num],
-        addressing_mode_values[addr_mod_num],
-        filter_mode_values[filt_mod_num],
-        0
-    };
+        0 };
+    clSamplerWrapper sampler = clCreateSamplerWithProperties(context, properties, &error);
+    test_error( error, "Unable to create sampler to test with" );
-    if (is_compatibility)
+    cl_uint refCount;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get sampler ref count" );
+    if( size != sizeof( refCount ) )
-        sampler =
-            clCreateSampler(context, normalized_coord_values[norm_coord_num],
-                            addressing_mode_values[addr_mod_num],
-                            filter_mode_values[filt_mod_num], &error);
-        test_error(error, "Unable to create sampler to test with");
-    }
-    else
-    {
-        sampler = clCreateSamplerWithProperties(context, properties, &error);
-        test_error(error, "Unable to create sampler to test with");
+        log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
-    error = clGetSamplerInfo(sampler, CL_SAMPLER_REFERENCE_COUNT,
-                             sizeof(refCount), &refCount, &size);
-    test_error(error, "Unable to get sampler ref count");
-    test_assert_error(size == sizeof(refCount),
-                      "Returned size of sampler refcount does not validate!\n");
-    error = sampler_param_test(sampler, CL_SAMPLER_CONTEXT, context, "context");
-    test_error(error, "param checking failed");
-    error = sampler_param_test(sampler, CL_SAMPLER_ADDRESSING_MODE,
-                               addressing_mode_values[addr_mod_num],
-                               "addressing mode");
-    test_error(error, "param checking failed");
-    error = sampler_param_test(sampler, CL_SAMPLER_FILTER_MODE,
-                               filter_mode_values[filt_mod_num], "filter mode");
-    test_error(error, "param checking failed");
-    error = sampler_param_test(sampler, CL_SAMPLER_NORMALIZED_COORDS,
-                               normalized_coord_values[norm_coord_num],
-                               "normalized coords");
-    test_error(error, "param checking failed");
-    Version version = get_device_cl_version(deviceID);
-    if (version >= Version(3, 0))
+    cl_context otherCtx;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
+    test_error( error, "Unable to get sampler context" );
+    if( otherCtx != context )
-        std::vector<cl_sampler_properties> test_properties(
-            properties, properties + ARRAY_SIZE(properties));
-        std::vector<cl_sampler_properties> check_properties;
-        size_t set_size;
-        error = clGetSamplerInfo(sampler, CL_SAMPLER_PROPERTIES, 0, NULL,
-                                 &set_size);
-        test_error(
-            error,
-            "clGetSamplerInfo failed asking for CL_SAMPLER_PROPERTIES size.");
-        if (is_compatibility)
-        {
-            if (set_size != 0)
-            {
-                log_error(
-                    "ERROR: CL_SAMPLER_PROPERTIES size is %d, expected 0\n",
-                    set_size);
-                return TEST_FAIL;
-            }
-        }
-        else
-        {
-            if (set_size
-                != test_properties.size() * sizeof(cl_sampler_properties))
-            {
-                log_error(
-                    "ERROR: CL_SAMPLER_PROPERTIES size is %d, expected %d.\n",
-                    set_size,
-                    test_properties.size() * sizeof(cl_sampler_properties));
-                return TEST_FAIL;
-            }
-            cl_uint number_of_props = set_size / sizeof(cl_sampler_properties);
-            check_properties.resize(number_of_props);
-            error = clGetSamplerInfo(sampler, CL_SAMPLER_PROPERTIES, set_size,
-                           , 0);
-            test_error(
-                error,
-                "clGetSamplerInfo failed asking for CL_SAMPLER_PROPERTIES.");
-            error = compareProperties(check_properties, test_properties);
-            test_error(error, "checkProperties mismatch.");
-        }
+        log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
+        return -1;
-    return 0;
-int get_sampler_info_params(cl_device_id deviceID, cl_context context,
-                            bool is_compatibility)
-    for (int norm_coord_num = 0;
-         norm_coord_num < ARRAY_SIZE(normalized_coord_values); norm_coord_num++)
+    if( size != sizeof( otherCtx ) )
-        for (int addr_mod_num = 0;
-             addr_mod_num < ARRAY_SIZE(addressing_mode_values); addr_mod_num++)
-        {
-            if ((normalized_coord_values[norm_coord_num] == CL_FALSE)
-                && ((addressing_mode_values[addr_mod_num] == CL_ADDRESS_REPEAT)
-                    || (addressing_mode_values[addr_mod_num]
-                        == CL_ADDRESS_MIRRORED_REPEAT)))
-            {
-                continue;
-            }
-            for (int filt_mod_num = 0;
-                 filt_mod_num < ARRAY_SIZE(filter_mode_values); filt_mod_num++)
-            {
-                int err = test_sampler_params(deviceID, context,
-                                              is_compatibility, norm_coord_num,
-                                              addr_mod_num, filt_mod_num);
-                test_error(err, "testing clGetSamplerInfo params failed");
-            }
-        }
+        log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
+        return -1;
-    return 0;
-int test_get_sampler_info(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements)
-    int error;
-    error = get_sampler_info_params(deviceID, context, false);
-    test_error(error, "Test Failed");
+    cl_addressing_mode mode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
+    test_error( error, "Unable to get sampler addressing mode" );
+    if( mode != CL_ADDRESS_CLAMP )
+    {
+        log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
+        return -1;
+    }
+    if( size != sizeof( mode ) )
+    {
+        log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
+        return -1;
+    }
+    cl_filter_mode fmode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
+    test_error( error, "Unable to get sampler filter mode" );
+    if( fmode != CL_FILTER_LINEAR )
+    {
+        log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
+        return -1;
+    }
+    if( size != sizeof( fmode ) )
+    {
+        log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
+        return -1;
+    }
+    cl_int norm;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
+    test_error( error, "Unable to get sampler normalized flag" );
+    if( norm != CL_TRUE )
+    {
+        log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
+        return -1;
+    }
+    if( size != sizeof( norm ) )
+    {
+        log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
+        return -1;
+    }
     return 0;
-int test_get_sampler_info_compatibility(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements)
-    int error;
-    error = get_sampler_info_params(deviceID, context, true);
-    test_error(error, "Test Failed");
-    return 0;
+#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast )    \
+error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get command queue " name );                            \
+if( val != expected )                                                                \
+{                                                                                    \
+log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val );    \
+return -1;                                                                        \
+}            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
-template <typename T>
-int command_queue_param_test(cl_command_queue queue,
-                             cl_command_queue_info param_name, T expected,
-                             const char *name)
-    size_t size;
-    T val;
-    int error =
-        clGetCommandQueueInfo(queue, param_name, sizeof(val), &val, &size);
-    test_error(error, "Unable to get command queue info");
-    if (val != expected)
-    {
-        test_fail("ERROR: Command queue %s did not validate!\n", name);
-    }
-    if (size != sizeof(val))
-    {
-        test_fail("ERROR: Returned size of command queue %s does not validate! "
-                  "(expected %d, got %d)\n",
-                  name, (int)sizeof(val), (int)size);
-    }
-    return 0;
-static cl_command_queue_properties property_options[] = {
-    0,
-int check_get_command_queue_info_params(cl_device_id deviceID,
-                                        cl_context context,
-                                        bool is_compatibility)
+int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
     int error;
     size_t size;
-    cl_queue_properties host_queue_props, device_queue_props;
-    cl_queue_properties queue_props[] = { CL_QUEUE_PROPERTIES, 0, 0 };
+    cl_queue_properties device_props;
+    cl_queue_properties queue_props[] = {CL_QUEUE_PROPERTIES,0,0};
-                    sizeof(host_queue_props), &host_queue_props, NULL);
-    log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n",
-             (int)host_queue_props);
-                    sizeof(device_queue_props), &device_queue_props, NULL);
-    log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n",
-             (int)device_queue_props);
+    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof(device_props), &device_props, NULL);
+    log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", (int)device_props);
-    auto version = get_device_cl_version(deviceID);
+    // Mask off vendor extension properties.  Only test standard OpenCL properties
-    // Are on device queues supported
-    bool on_device_supported =
-        (version >= Version(2, 0) && version < Version(3, 0))
-        || (version >= Version(3, 0) && device_queue_props != 0);
+    queue_props[1] = device_props;
+    clCommandQueueWrapper queue = clCreateCommandQueueWithProperties( context, deviceID, &queue_props[0], &error );
+    test_error( error, "Unable to create command queue to test with" );
-    int num_test_options = MIN_NUM_COMMAND_QUEUE_PROPERTIES;
-    if (host_queue_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
+    cl_uint refCount;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get command queue reference count" );
+    if( size != sizeof( refCount ) )
-        // Test out-of-order queues properties if supported
-        num_test_options = OOO_NUM_COMMAND_QUEUE_PROPERTIES;
-    }
-    if (on_device_supported && !is_compatibility)
-    {
-        // Test queue on device if supported (in this case out-of-order must
-        // also be supported)
-        num_test_options = ARRAY_SIZE(property_options);
+        log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
-    for (int i = 0; i < num_test_options; i++)
-    {
-        queue_props[1] = property_options[i];
-        clCommandQueueWrapper queue;
+    cl_context otherCtx;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
-        if (is_compatibility)
-        {
-            queue =
-                clCreateCommandQueue(context, deviceID, queue_props[1], &error);
-            test_error(error, "Unable to create command queue to test with");
-        }
-        else
-        {
-            queue = clCreateCommandQueueWithProperties(context, deviceID,
-                                                       &queue_props[0], &error);
-            test_error(error, "Unable to create command queue to test with");
-        }
+    cl_device_id otherDevice;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
+    test_error(error, "clGetCommandQueue failed.");
-        cl_uint refCount;
-        error = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT,
-                                      sizeof(refCount), &refCount, &size);
-        test_error(error, "Unable to get command queue reference count");
-        test_assert_error(size == sizeof(refCount),
-                          "Returned size of command queue reference count does "
-                          "not validate!\n");
-        error = command_queue_param_test(queue, CL_QUEUE_CONTEXT, context,
-                                         "context");
-        test_error(error, "param checking failed");
-        error = command_queue_param_test(queue, CL_QUEUE_DEVICE, deviceID,
-                                         "deviceID");
-        test_error(error, "param checking failed");
-        error = command_queue_param_test(queue, CL_QUEUE_PROPERTIES,
-                                         queue_props[1], "properties");
-        test_error(error, "param checking failed");
+    if (size != sizeof(cl_device_id)) {
+        log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
+        return -1;
-    return 0;
-int test_get_command_queue_info(cl_device_id deviceID, cl_context context,
-                                cl_command_queue ignoreQueue, int num_elements)
-    int error = check_get_command_queue_info_params(deviceID, context, false);
-    test_error(error, "Test Failed");
-    return 0;
-int test_get_command_queue_info_compatibility(cl_device_id deviceID,
-                                              cl_context context,
-                                              cl_command_queue ignoreQueue,
-                                              int num_elements)
-    int error = check_get_command_queue_info_params(deviceID, context, true);
-    test_error(error, "Test Failed");
+    /* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
+    cl_uint otherDevice_vid, deviceID_vid;
+    error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+    if( otherDevice_vid != deviceID_vid )
+    {
+        log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
+        return -1;
+    }
+    cl_command_queue_properties props;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
     return 0;
@@ -581,8 +400,7 @@
     // extensions can support double but may not support cl_khr_fp64, which implies math library support.
     cl_uint baseAddrAlign;
-                      "base address alignment", "%d bits", int)
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, baseAddrAlign, "base address alignment", "%d bits", int )

     cl_uint maxDataAlign;
     TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, maxDataAlign, "min data type alignment", "%d bytes", int )
diff --git a/test_conformance/api/test_queries_compatibility.cpp b/test_conformance/api/test_queries_compatibility.cpp
new file mode 100644
index 0000000..f65c5db
--- /dev/null
+++ b/test_conformance/api/test_queries_compatibility.cpp
@@ -0,0 +1,164 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "testBase.h"
+#include "harness/imageHelpers.h"
+#include <stdlib.h>
+#include <ctype.h>
+int test_get_sampler_info_compatibility(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    int error;
+    size_t size;
+    clSamplerWrapper sampler = clCreateSampler( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_LINEAR, &error );
+    test_error( error, "Unable to create sampler to test with" );
+    cl_uint refCount;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get sampler ref count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
+    }
+    cl_context otherCtx;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
+    test_error( error, "Unable to get sampler context" );
+    if( otherCtx != context )
+    {
+        log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
+        return -1;
+    }
+    if( size != sizeof( otherCtx ) )
+    {
+        log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
+        return -1;
+    }
+    cl_addressing_mode mode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
+    test_error( error, "Unable to get sampler addressing mode" );
+    if( mode != CL_ADDRESS_CLAMP )
+    {
+        log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
+        return -1;
+    }
+    if( size != sizeof( mode ) )
+    {
+        log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
+        return -1;
+    }
+    cl_filter_mode fmode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
+    test_error( error, "Unable to get sampler filter mode" );
+    if( fmode != CL_FILTER_LINEAR )
+    {
+        log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
+        return -1;
+    }
+    if( size != sizeof( fmode ) )
+    {
+        log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
+        return -1;
+    }
+    cl_int norm;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
+    test_error( error, "Unable to get sampler normalized flag" );
+    if( norm != CL_TRUE )
+    {
+        log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
+        return -1;
+    }
+    if( size != sizeof( norm ) )
+    {
+        log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
+        return -1;
+    }
+    return 0;
+#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast )    \
+error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get command queue " name );                            \
+if( val != expected )                                                                \
+{                                                                                    \
+log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val );    \
+return -1;                                                                        \
+}            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+int test_get_command_queue_info_compatibility(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+    int error;
+    size_t size;
+    cl_command_queue_properties device_props;
+    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
+    log_info("CL_DEVICE_QUEUE_PROPERTIES is %d\n", (int)device_props);
+    clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, device_props, &error );
+    test_error( error, "Unable to create command queue to test with" );
+    cl_uint refCount;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get command queue reference count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
+    }
+    cl_context otherCtx;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+    cl_device_id otherDevice;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
+    test_error(error, "clGetCommandQueue failed.");
+    if (size != sizeof(cl_device_id)) {
+        log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
+        return -1;
+    }
+    /* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
+    cl_uint otherDevice_vid, deviceID_vid;
+    error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+    if( otherDevice_vid != deviceID_vid )
+    {
+        log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
+        return -1;
+    }
+    cl_command_queue_properties props;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
+    return 0;
diff --git a/test_conformance/api/test_queue.cpp b/test_conformance/api/test_queue.cpp
deleted file mode 100644
index 27ed5f0..0000000
--- a/test_conformance/api/test_queue.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include "harness/typeWrappers.h"
-int test_queue_flush_on_release(cl_device_id deviceID, cl_context context,
-                                cl_command_queue defaultQueue, int num_elements)
-    cl_int err;
-    // Create a command queue
-    cl_command_queue queue = clCreateCommandQueue(context, deviceID, 0, &err);
-    test_error(err, "Could not create command queue");
-    // Create a kernel
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    const char *source = "void kernel test(){}";
-    err = create_single_kernel_helper(context, &program, &kernel, 1, &source,
-                                      "test");
-    test_error(err, "Could not create kernel");
-    // Enqueue the kernel
-    size_t gws = 1;
-    clEventWrapper event;
-    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &gws, nullptr, 0,
-                                 nullptr, &event);
-    test_error(err, "Could not enqueue kernel");
-    // Release the queue
-    err = clReleaseCommandQueue(queue);
-    // Wait for kernel to execute since the queue must flush on release
-    bool success = poll_until(2000, 50, [&event]() {
-        cl_int status;
-        cl_int err = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
-                                    sizeof(cl_int), &status, nullptr);
-        if ((err != CL_SUCCESS) || (status != CL_COMPLETE))
-        {
-            return false;
-        }
-        return true;
-    });
-    return success ? TEST_PASS : TEST_FAIL;
diff --git a/test_conformance/api/test_queue_properties_queries.cpp b/test_conformance/api/test_queue_properties_queries.cpp
deleted file mode 100644
index 843fa84..0000000
--- a/test_conformance/api/test_queue_properties_queries.cpp
+++ /dev/null
@@ -1,269 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include "harness/propertyHelpers.h"
-#include "harness/typeWrappers.h"
-#include <vector>
-#include <algorithm>
-struct test_queue_array_properties_data
-    std::vector<cl_queue_properties> properties;
-    std::string description;
-int verify_if_properties_supported(
-    cl_device_id deviceID, cl_command_queue_properties requested_bitfield,
-    cl_uint requested_size)
-    int error = CL_SUCCESS;
-    bool on_host_queue = true;
-    if (requested_bitfield & CL_QUEUE_ON_DEVICE)
-    {
-        on_host_queue = false;
-        if (requested_size > 0)
-        {
-            cl_uint max_queue_size = 0;
-            error =
-                clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
-                                sizeof(max_queue_size), &max_queue_size, NULL);
-            test_error(error,
-                       "clGetDeviceInfo for "
-                       "CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE failed");
-            if (requested_size > max_queue_size)
-            {
-                log_info(
-                    "The value of CL_QUEUE_SIZE = %d cannot be bigger than "
-                    "CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE = %d, skipped\n",
-                    requested_size, max_queue_size);
-                return TEST_SKIPPED_ITSELF;
-            }
-        }
-    }
-    cl_command_queue_properties supported_properties = 0;
-    cl_command_queue_properties all_properties = 0;
-    std::vector<cl_command_queue_properties> all_properties_vector{
-    };
-    for (auto each_property : all_properties_vector)
-    {
-        all_properties |= each_property;
-    }
-    cl_command_queue_properties requested_properties =
-        all_properties & requested_bitfield;
-    if (on_host_queue)
-    {
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
-                                sizeof(supported_properties),
-                                &supported_properties, NULL);
-        test_error(error,
-                   "clGetDeviceInfo asking for "
-                   "CL_DEVICE_QUEUE_ON_HOST_PROPERTIES failed");
-    }
-    else
-    {
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
-                                sizeof(supported_properties),
-                                &supported_properties, NULL);
-        test_error(error,
-                   "clGetDeviceInfo asking for "
-                   "CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES failed");
-    }
-    for (auto each_property : all_properties_vector)
-    {
-        if ((each_property & requested_properties)
-            && !(each_property & supported_properties))
-        {
-            log_info("\t%s not supported, skipped\n",
-                     GetQueuePropertyName(each_property));
-            return TEST_SKIPPED_ITSELF;
-        }
-        else if ((each_property & requested_properties)
-                 && each_property & supported_properties)
-        {
-            log_info("\t%s supported\n", GetQueuePropertyName(each_property));
-        }
-    }
-    return error;
-static int create_queue_and_check_array_properties(
-    cl_context context, cl_device_id deviceID,
-    test_queue_array_properties_data test_case)
-    cl_int error = CL_SUCCESS;
-    clCommandQueueWrapper test_queue;
-    if ( > 0)
-    {
-        test_queue = clCreateCommandQueueWithProperties(
-            context, deviceID,, &error);
-        test_error(error, "clCreateCommandQueueWithProperties failed");
-    }
-    else
-    {
-        test_queue =
-            clCreateCommandQueueWithProperties(context, deviceID, NULL, &error);
-        test_error(error, "clCreateCommandQueueWithProperties failed");
-    }
-    std::vector<cl_queue_properties> check_properties;
-    size_t set_size = 0;
-    error = clGetCommandQueueInfo(test_queue, CL_QUEUE_PROPERTIES_ARRAY, 0,
-                                  NULL, &set_size);
-    test_error(error,
-               "clGetCommandQueueInfo failed asking for "
-               "CL_QUEUE_PROPERTIES_ARRAY size.");
-    if (set_size == 0 && == 0)
-    {
-        return TEST_PASS;
-    }
-    if (set_size != * sizeof(cl_queue_properties))
-    {
-        log_error("ERROR: CL_QUEUE_PROPERTIES_ARRAY size is %d, expected %d.\n",
-                  set_size,
-         * sizeof(cl_queue_properties));
-        return TEST_FAIL;
-    }
-    cl_uint number_of_props = set_size / sizeof(cl_queue_properties);
-    check_properties.resize(number_of_props);
-    error = clGetCommandQueueInfo(test_queue, CL_QUEUE_PROPERTIES_ARRAY,
-                                  set_size,, NULL);
-    test_error(
-        error,
-        "clGetCommandQueueInfo failed asking for CL_QUEUE_PROPERTIES_ARRAY.");
-    error = compareProperties(check_properties,;
-    return error;
-static int
-run_test_queue_array_properties(cl_context context, cl_device_id deviceID,
-                                test_queue_array_properties_data test_case)
-    int error = TEST_PASS;
-    std::vector<cl_queue_properties> requested_properties =
-    log_info("\nTC description: %s\n", test_case.description.c_str());
-    // first verify if user properties are supported
-    if (requested_properties.size() != 0)
-    {
-        requested_properties.pop_back();
-        cl_command_queue_properties requested_bitfield = 0;
-        cl_uint requested_size = 0;
-        for (cl_uint i = 0; i < requested_properties.size(); i = i + 2)
-        {
-            if (requested_properties[i] == CL_QUEUE_PROPERTIES)
-            {
-                requested_bitfield = requested_properties[i + 1];
-            }
-            if (requested_properties[i] == CL_QUEUE_SIZE)
-            {
-                requested_size = requested_properties[i + 1];
-            }
-        }
-        error = verify_if_properties_supported(deviceID, requested_bitfield,
-                                               requested_size);
-        if (error == TEST_SKIPPED_ITSELF)
-        {
-            log_info("TC result: skipped\n");
-            return TEST_PASS;
-        }
-        test_error(error,
-                   "Checking which queue properties supported failed.\n");
-    }
-    // continue testing if supported user properties
-    error =
-        create_queue_and_check_array_properties(context, deviceID, test_case);
-    test_error(error, "create_queue_and_check_array_properties failed.\n");
-    log_info("TC result: passed\n");
-    return TEST_PASS;
-int test_queue_properties_queries(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements)
-    int error = TEST_PASS;
-    std::vector<test_queue_array_properties_data> test_cases;
-    test_cases.push_back({ {}, "host queue, NULL properties" });
-    test_cases.push_back(
-        { { CL_QUEUE_PROPERTIES, 0, 0 }, "host queue, zero properties" });
-    test_cases.push_back(
-          "host queue, CL_QUEUE_PROFILING_ENABLE" });
-    test_cases.push_back(
-          "host queue, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE" });
-    test_cases.push_back(
-            0 },
-          "host queue, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | "
-    test_cases.push_back(
-          "device queue, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | "
-          "CL_QUEUE_ON_DEVICE" });
-    test_cases.push_back(
-            CL_QUEUE_SIZE, 124, 0 },
-          "device queue, all possible properties" });
-    test_cases.push_back(
-                | CL_QUEUE_PROFILING_ENABLE,
-            CL_QUEUE_SIZE, 124, 0 },
-          "device queue, all without CL_QUEUE_ON_DEVICE_DEFAULT" });
-    test_cases.push_back(
-            0 },
-          "device queue, all without CL_QUEUE_SIZE" });
-    for (auto test_case : test_cases)
-    {
-        error |= run_test_queue_array_properties(context, deviceID, test_case);
-    }
-    return error;
diff --git a/test_conformance/api/test_retain.cpp b/test_conformance/api/test_retain.cpp
index 6e66c7d..cf065bc 100644
--- a/test_conformance/api/test_retain.cpp
+++ b/test_conformance/api/test_retain.cpp
@@ -251,9 +251,11 @@
     err = clSetMemObjectDestructorCallback( buffer, callback, nullptr );
     test_error( err, "Unable to set destructor callback" );
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      testProgram, "sample_test");
-    test_error(err, "Unable to build sample program and sample_test kernel");
+    err = create_single_kernel_helper( context, &program, nullptr, 1, testProgram, nullptr );
+    test_error( err, "Unable to build sample program" );
+    kernel = clCreateKernel( program, "sample_test", &err );
+    test_error( err, "Unable to create sample_test kernel" );
     err = clSetKernelArg( kernel, 0, sizeof(cl_mem), &buffer );
     test_error( err, "Unable to set kernel argument" );
diff --git a/test_conformance/api/test_retain_program.cpp b/test_conformance/api/test_retain_program.cpp
index b9fc8b7..a85bc70 100644
--- a/test_conformance/api/test_retain_program.cpp
+++ b/test_conformance/api/test_retain_program.cpp
@@ -28,11 +28,14 @@
     int error;
     const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
-    /* Create a test program and kernel from it */
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        testProgram, "sample_test");
+    /* Create a test program */
+    error = create_single_kernel_helper(context, &program, NULL, 1, testProgram, NULL);
     test_error( error, "Unable to build sample program to test with" );
+    /* And create a kernel from it */
+    kernel = clCreateKernel( program, "sample_test", &error );
+    test_error( error, "Unable to create kernel" );
     /* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
     clReleaseProgram( program );
     clReleaseKernel( kernel );
@@ -65,11 +68,9 @@
         return -1;
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 10, NULL, &error);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 10,
-                                NULL, &error);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
     test_error( error, "Creating test array failed" );
     /* Set the arguments */
diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp
index 01d0ffa..c0dc372 100644
--- a/test_conformance/api/test_sub_group_dispatch.cpp
+++ b/test_conformance/api/test_sub_group_dispatch.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -75,29 +75,7 @@
     size_t ret_ndrange2d_flattened;
     size_t ret_ndrange3d_flattened;
-    if (get_device_cl_version(deviceID) >= Version(3, 0))
-    {
-        int error;
-        cl_uint max_num_sub_groups;
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_NUM_SUB_GROUPS,
-                                sizeof(max_num_sub_groups), &max_num_sub_groups,
-                                NULL);
-        if (error != CL_SUCCESS)
-        {
-            print_error(error, "Unable to get max num subgroups");
-            return error;
-        }
-        if (max_num_sub_groups == 0)
-        {
-            return TEST_SKIPPED_ITSELF;
-        }
-    }
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        subgroup_dispatch_kernel,
-                                        "subgroup_dispatch_kernel");
+    error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, subgroup_dispatch_kernel, "subgroup_dispatch_kernel", "-cl-std=CL2.0");
     if (error != 0)
         return error;
diff --git a/test_conformance/api/test_zero_sized_enqueue.cpp b/test_conformance/api/test_zero_sized_enqueue.cpp
index 7efb32c..dabe75f 100644
--- a/test_conformance/api/test_zero_sized_enqueue.cpp
+++ b/test_conformance/api/test_zero_sized_enqueue.cpp
@@ -17,15 +17,14 @@
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
-const char* zero_sized_enqueue_test_kernel[] = {
-    "__kernel void foo_kernel(__global int *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = 1;\n"
-    "\n"
-    "}\n"
+const char *zero_sized_enqueue_test_kernel[] = {
+"__kernel void foo_kernel(__global float *src, __global int *dst)\n"
+"    int  tid = get_global_id(0);\n"
+"    dst[tid] = (int)src[tid];\n"
+"}\n" };
 const int bufSize = 128;
@@ -63,7 +62,7 @@
     int error;
     clProgramWrapper program;
     clKernelWrapper kernel;
-    clMemWrapper output_stream;
+    clMemWrapper            streams[2];
     size_t    ndrange1 = 0;
     size_t    ndrange20[2] = {0, 0};
     size_t    ndrange21[2] = {1, 0};
@@ -77,15 +76,15 @@
     size_t    ndrange35[3] = {1, 0, 1};
     size_t    ndrange36[3] = {1, 1, 0};
-    output_stream =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
-                       bufSize * sizeof(int), NULL, &error);
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufSize * sizeof(int), NULL, &error);
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufSize * sizeof(int), NULL, &error);
-    // Initialise output buffer.
-    int output_buffer_data = 0;
-    error = clEnqueueFillBuffer(queue, output_stream, &output_buffer_data,
-                                sizeof(int), 0, sizeof(int) * bufSize, 0, NULL,
-                                NULL);
+    int* buf = new int[bufSize];
+    memset(buf, 0, sizeof(int) * bufSize);
+    // update output buffer
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(int) * bufSize, buf, 0, NULL, NULL);
     /* Create a kernel to test with */
     if( create_single_kernel_helper( context, &program, &kernel, 1, zero_sized_enqueue_test_kernel, "foo_kernel" ) != 0 )
@@ -93,53 +92,44 @@
         return -1;
-    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &output_stream);
+    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]);
+    test_error( error, "clSetKernelArg failed." );
+    error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &streams[1]);
     test_error( error, "clSetKernelArg failed." );
     // Simple API return code tests for 1D, 2D and 3D zero sized ND range.
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 1, &ndrange1);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 1, &ndrange1);
     test_error( error, "1D zero sized kernel enqueue failed." );
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 2, ndrange20);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange20);
     test_error( error, "2D zero sized kernel enqueue failed." );
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 2, ndrange21);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange21);
     test_error( error, "2D zero sized kernel enqueue failed." );
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 2, ndrange22);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange22);
     test_error( error, "2D zero sized kernel enqueue failed." );
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange30);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange30);
     test_error( error, "3D zero sized kernel enqueue failed." );
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange31);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange31);
     test_error( error, "3D zero sized kernel enqueue failed." );
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange32);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange32);
     test_error( error, "3D zero sized kernel enqueue failed." );
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange33);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange33);
     test_error( error, "3D zero sized kernel enqueue failed." );
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange34);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange34);
     test_error( error, "3D zero sized kernel enqueue failed." );
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange35);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange35);
     test_error( error, "3D zero sized kernel enqueue failed." );
-    error = test_zero_sized_enqueue_and_test_output_buffer(
-        queue, kernel, output_stream, 3, ndrange36);
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange36);
     test_error( error, "3D zero sized kernel enqueue failed." );
     // Verify zero-sized ND range kernel still satisfy event wait list and correct event object
@@ -159,7 +149,7 @@
     error = clGetEventInfo(ev, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &sta, NULL);
     test_error( error, "Failed to get event status.");
-    if (sta != CL_QUEUED && sta != CL_SUBMITTED)
+    if (sta != CL_QUEUED)
         log_error( "ERROR: incorrect zero sized kernel enqueue event status.\n" );
         return -1;
@@ -181,6 +171,8 @@
         return -1;
+    delete [] buf;
     return 0;
diff --git a/test_conformance/atomics/main.cpp b/test_conformance/atomics/main.cpp
index afdea37..6904d7c 100644
--- a/test_conformance/atomics/main.cpp
+++ b/test_conformance/atomics/main.cpp
@@ -45,6 +45,6 @@
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp
index 34b34ed..24bf974 100644
--- a/test_conformance/atomics/test_atomics.cpp
+++ b/test_conformance/atomics/test_atomics.cpp
@@ -243,17 +243,13 @@
     for( size_t i = 0; i < numDestItems; i++ )
         memcpy( destItems + i * typeSize, startValue, typeSize );
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                typeSize * numDestItems, destItems, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * numDestItems, destItems, NULL);
     if (!streams[0])
         log_error("ERROR: Creating output array failed!\n");
         return -1;
-    streams[1] = clCreateBuffer(
-        context,
-        ((startRefValues != NULL ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
-        typeSize * threadSize, startRefValues, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(( startRefValues != NULL ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE )), typeSize * threadSize, startRefValues, NULL);
     if (!streams[1])
         log_error("ERROR: Creating reference array failed!\n");
@@ -1004,7 +1000,8 @@
     // Last item doesn't get and'ed on every bit, so we have to mask away
     size_t numBits = (size_t)size - whichResult * 64;
     cl_long bits = (cl_long)0xffffffffffffffffLL;
-    for (size_t i = 0; i < numBits; i++) bits &= ~(1LL << i);
+    for( size_t i = 0; i < numBits; i++ )
+        bits &= ~( 1 << i );
     return bits;
@@ -1085,16 +1082,18 @@
 #pragma mark ---- xor
 const char atom_xor_core[] =
-    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-    "    int  bitIndex = tid & ( numBits - 1 );\n"
-    "\n"
-    "    oldValues[tid] = atom_xor( &destMemory[0], 1L << bitIndex );\n";
+"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+"    int  bitIndex = tid & ( numBits - 1 );\n"
+"    oldValues[tid] = atom_xor( &destMemory[0], 1 << bitIndex );\n"
 const char atomic_xor_core[] =
-    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-    "    int  bitIndex = tid & ( numBits - 1 );\n"
-    "\n"
-    "    oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n";
+"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+"    int  bitIndex = tid & ( numBits - 1 );\n"
+"    oldValues[tid] = atomic_xor( &destMemory[0], 1 << bitIndex );\n"
 cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index b85e3d2..9a27d07 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp
@@ -64,12 +64,12 @@
              (int)numGlobalThreads, (int)numLocalThreads);
     // Create the counter that will keep track of where each thread writes.
-    counter = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 1,
-                             NULL, NULL);
+    counter = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                   sizeof(cl_int) * 1, NULL, NULL);
     // Create the counters that will hold the results of each thread writing
     // its ID into a (hopefully) unique location.
-    counters = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_int) * numGlobalThreads, NULL, NULL);
+    counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                    sizeof(cl_int) * numGlobalThreads, NULL, NULL);
     // Reset all those locations to -1 to indciate they have not been used.
     cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
@@ -175,15 +175,12 @@
              (int)global_threads[0], (int)local_threads[0]);
     // Allocate our storage
-    cl_mem bin_counters =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_int) * number_of_bins, NULL, NULL);
-    cl_mem bins = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        sizeof(cl_int) * number_of_bins * max_counts_per_bin, NULL, NULL);
-    cl_mem bin_assignments =
-        clCreateBuffer(context, CL_MEM_READ_ONLY,
-                       sizeof(cl_int) * number_of_items, NULL, NULL);
+    cl_mem bin_counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                        sizeof(cl_int) * number_of_bins, NULL, NULL);
+    cl_mem bins = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                sizeof(cl_int) * number_of_bins*max_counts_per_bin, NULL, NULL);
+    cl_mem bin_assignments = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY),
+                                           sizeof(cl_int) * number_of_items, NULL, NULL);
     if (bin_counters == NULL) {
         log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt
index c5c4b5f..d73b84a 100644
--- a/test_conformance/basic/CMakeLists.txt
+++ b/test_conformance/basic/CMakeLists.txt
@@ -37,12 +37,8 @@
-    test_async_copy2D.cpp
-    test_async_copy3D.cpp
-    test_async_copy_fence.cpp	
-    test_vector_swizzle.cpp
diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp
index 86c3cec..11ed2c3 100644
--- a/test_conformance/basic/main.cpp
+++ b/test_conformance/basic/main.cpp
@@ -26,147 +26,136 @@
 #include "procs.h"
 test_definition test_list[] = {
-    ADD_TEST(hostptr),
-    ADD_TEST(fpmath_float),
-    ADD_TEST(fpmath_float2),
-    ADD_TEST(fpmath_float4),
-    ADD_TEST(intmath_int),
-    ADD_TEST(intmath_int2),
-    ADD_TEST(intmath_int4),
-    ADD_TEST(intmath_long),
-    ADD_TEST(intmath_long2),
-    ADD_TEST(intmath_long4),
-    ADD_TEST(hiloeo),
-    ADD_TEST(if),
-    ADD_TEST(sizeof),
-    ADD_TEST(loop),
-    ADD_TEST(pointer_cast),
-    ADD_TEST(local_arg_def),
-    ADD_TEST(local_kernel_def),
-    ADD_TEST(local_kernel_scope),
-    ADD_TEST(constant),
-    ADD_TEST(constant_source),
-    ADD_TEST(readimage),
-    ADD_TEST(readimage_int16),
-    ADD_TEST(readimage_fp32),
-    ADD_TEST(writeimage),
-    ADD_TEST(writeimage_int16),
-    ADD_TEST(writeimage_fp32),
-    ADD_TEST(mri_one),
+    ADD_TEST( hostptr ),
+    ADD_TEST( fpmath_float ),
+    ADD_TEST( fpmath_float2 ),
+    ADD_TEST( fpmath_float4 ),
+    ADD_TEST( intmath_int ),
+    ADD_TEST( intmath_int2 ),
+    ADD_TEST( intmath_int4 ),
+    ADD_TEST( intmath_long ),
+    ADD_TEST( intmath_long2 ),
+    ADD_TEST( intmath_long4 ),
+    ADD_TEST( hiloeo ),
+    ADD_TEST( if ),
+    ADD_TEST( sizeof ),
+    ADD_TEST( loop ),
+    ADD_TEST( pointer_cast ),
+    ADD_TEST( local_arg_def ),
+    ADD_TEST( local_kernel_def ),
+    ADD_TEST( local_kernel_scope ),
+    ADD_TEST( constant ),
+    ADD_TEST( constant_source ),
+    ADD_TEST( readimage ),
+    ADD_TEST( readimage_int16 ),
+    ADD_TEST( readimage_fp32 ),
+    ADD_TEST( writeimage ),
+    ADD_TEST( writeimage_int16 ),
+    ADD_TEST( writeimage_fp32 ),
+    ADD_TEST( mri_one ),
-    ADD_TEST(mri_multiple),
-    ADD_TEST(image_r8),
-    ADD_TEST(barrier),
-    ADD_TEST_VERSION(wg_barrier, Version(2, 0)),
-    ADD_TEST(int2float),
-    ADD_TEST(float2int),
-    ADD_TEST(imagereadwrite),
-    ADD_TEST(imagereadwrite3d),
-    ADD_TEST(readimage3d),
-    ADD_TEST(readimage3d_int16),
-    ADD_TEST(readimage3d_fp32),
-    ADD_TEST(bufferreadwriterect),
-    ADD_TEST(arrayreadwrite),
-    ADD_TEST(arraycopy),
-    ADD_TEST(imagearraycopy),
-    ADD_TEST(imagearraycopy3d),
-    ADD_TEST(imagecopy),
-    ADD_TEST(imagecopy3d),
-    ADD_TEST(imagerandomcopy),
-    ADD_TEST(arrayimagecopy),
-    ADD_TEST(arrayimagecopy3d),
-    ADD_TEST(imagenpot),
+    ADD_TEST( mri_multiple ),
+    ADD_TEST( image_r8 ),
+    ADD_TEST( barrier ),
+    ADD_TEST_VERSION( wg_barrier, Version(2, 0) ),
+    ADD_TEST( int2float ),
+    ADD_TEST( float2int ),
+    ADD_TEST( imagereadwrite ),
+    ADD_TEST( imagereadwrite3d ),
+    ADD_TEST( readimage3d ),
+    ADD_TEST( readimage3d_int16 ),
+    ADD_TEST( readimage3d_fp32 ),
+    ADD_TEST( bufferreadwriterect ),
+    ADD_TEST( arrayreadwrite ),
+    ADD_TEST( arraycopy ),
+    ADD_TEST( imagearraycopy ),
+    ADD_TEST( imagearraycopy3d ),
+    ADD_TEST( imagecopy ),
+    ADD_TEST( imagecopy3d ),
+    ADD_TEST( imagerandomcopy ),
+    ADD_TEST( arrayimagecopy ),
+    ADD_TEST( arrayimagecopy3d ),
+    ADD_TEST( imagenpot ),
-    ADD_TEST(vload_global),
-    ADD_TEST(vload_local),
-    ADD_TEST(vload_constant),
-    ADD_TEST(vload_private),
-    ADD_TEST(vstore_global),
-    ADD_TEST(vstore_local),
-    ADD_TEST(vstore_private),
+    ADD_TEST( vload_global ),
+    ADD_TEST( vload_local ),
+    ADD_TEST( vload_constant ),
+    ADD_TEST( vload_private ),
+    ADD_TEST( vstore_global ),
+    ADD_TEST( vstore_local ),
+    ADD_TEST( vstore_private ),
-    ADD_TEST(createkernelsinprogram),
-    ADD_TEST(imagedim_pow2),
-    ADD_TEST(imagedim_non_pow2),
-    ADD_TEST(image_param),
-    ADD_TEST(image_multipass_integer_coord),
-    ADD_TEST(image_multipass_float_coord),
-    ADD_TEST(explicit_s2v_char),
-    ADD_TEST(explicit_s2v_uchar),
-    ADD_TEST(explicit_s2v_short),
-    ADD_TEST(explicit_s2v_ushort),
-    ADD_TEST(explicit_s2v_int),
-    ADD_TEST(explicit_s2v_uint),
-    ADD_TEST(explicit_s2v_long),
-    ADD_TEST(explicit_s2v_ulong),
-    ADD_TEST(explicit_s2v_float),
-    ADD_TEST(explicit_s2v_double),
+    ADD_TEST( createkernelsinprogram ),
+    ADD_TEST( imagedim_pow2 ),
+    ADD_TEST( imagedim_non_pow2 ),
+    ADD_TEST( image_param ),
+    ADD_TEST( image_multipass_integer_coord ),
+    ADD_TEST( image_multipass_float_coord ),
+    ADD_TEST( explicit_s2v_char ),
+    ADD_TEST( explicit_s2v_uchar ),
+    ADD_TEST( explicit_s2v_short ),
+    ADD_TEST( explicit_s2v_ushort ),
+    ADD_TEST( explicit_s2v_int ),
+    ADD_TEST( explicit_s2v_uint ),
+    ADD_TEST( explicit_s2v_long ),
+    ADD_TEST( explicit_s2v_ulong ),
+    ADD_TEST( explicit_s2v_float ),
+    ADD_TEST( explicit_s2v_double ),
-    ADD_TEST(enqueue_map_buffer),
-    ADD_TEST(enqueue_map_image),
+    ADD_TEST( enqueue_map_buffer ),
+    ADD_TEST( enqueue_map_image ),
-    ADD_TEST(work_item_functions),
+    ADD_TEST( work_item_functions ),
-    ADD_TEST(astype),
+    ADD_TEST( astype ),
-    ADD_TEST(async_copy_global_to_local),
-    ADD_TEST(async_copy_local_to_global),
-    ADD_TEST(async_strided_copy_global_to_local),
-    ADD_TEST(async_strided_copy_local_to_global),
-    ADD_TEST(async_copy_global_to_local2D),
-    ADD_TEST(async_copy_local_to_global2D),
-    ADD_TEST(async_copy_global_to_local3D),
-    ADD_TEST(async_copy_local_to_global3D),
-    ADD_TEST(async_work_group_copy_fence_import_after_export_aliased_local),
-    ADD_TEST(async_work_group_copy_fence_import_after_export_aliased_global),
-    ADD_TEST(
-        async_work_group_copy_fence_import_after_export_aliased_global_and_local),
-    ADD_TEST(async_work_group_copy_fence_export_after_import_aliased_local),
-    ADD_TEST(async_work_group_copy_fence_export_after_import_aliased_global),
-    ADD_TEST(
-        async_work_group_copy_fence_export_after_import_aliased_global_and_local),
-    ADD_TEST(prefetch),
-    ADD_TEST(kernel_call_kernel_function),
-    ADD_TEST(host_numeric_constants),
-    ADD_TEST(kernel_numeric_constants),
-    ADD_TEST(kernel_limit_constants),
-    ADD_TEST(kernel_preprocessor_macros),
-    ADD_TEST(parameter_types),
-    ADD_TEST(vector_creation),
-    ADD_TEST(vector_swizzle),
-    ADD_TEST(vec_type_hint),
-    ADD_TEST(kernel_memory_alignment_local),
-    ADD_TEST(kernel_memory_alignment_global),
-    ADD_TEST(kernel_memory_alignment_constant),
-    ADD_TEST(kernel_memory_alignment_private),
+    ADD_TEST( async_copy_global_to_local ),
+    ADD_TEST( async_copy_local_to_global ),
+    ADD_TEST( async_strided_copy_global_to_local ),
+    ADD_TEST( async_strided_copy_local_to_global ),
+    ADD_TEST( prefetch ),
-    ADD_TEST_VERSION(progvar_prog_scope_misc, Version(2, 0)),
-    ADD_TEST_VERSION(progvar_prog_scope_uninit, Version(2, 0)),
-    ADD_TEST_VERSION(progvar_prog_scope_init, Version(2, 0)),
-    ADD_TEST_VERSION(progvar_func_scope, Version(2, 0)),
+    ADD_TEST( kernel_call_kernel_function ),
+    ADD_TEST( host_numeric_constants ),
+    ADD_TEST( kernel_numeric_constants ),
+    ADD_TEST( kernel_limit_constants ),
+    ADD_TEST( kernel_preprocessor_macros ),
-    ADD_TEST(global_work_offsets),
-    ADD_TEST(get_global_offset),
+    ADD_TEST( parameter_types ),
+    ADD_TEST( vector_creation ),
+    ADD_TEST( vec_type_hint ),
+    ADD_TEST( kernel_memory_alignment_local ),
+    ADD_TEST( kernel_memory_alignment_global ),
+    ADD_TEST( kernel_memory_alignment_constant ),
+    ADD_TEST( kernel_memory_alignment_private ),
-    ADD_TEST_VERSION(global_linear_id, Version(2, 0)),
-    ADD_TEST_VERSION(local_linear_id, Version(2, 0)),
-    ADD_TEST_VERSION(enqueued_local_size, Version(2, 0)),
+    ADD_TEST_VERSION( progvar_prog_scope_misc, Version(2, 0) ),
+    ADD_TEST_VERSION( progvar_prog_scope_uninit, Version(2, 0) ),
+    ADD_TEST_VERSION( progvar_prog_scope_init, Version(2, 0) ),
+    ADD_TEST_VERSION( progvar_func_scope, Version(2, 0) ),
-    ADD_TEST(simple_read_image_pitch),
-    ADD_TEST(simple_write_image_pitch),
+    ADD_TEST( global_work_offsets ),
+    ADD_TEST( get_global_offset ),
+    ADD_TEST_VERSION( global_linear_id, Version(2, 0) ),
+    ADD_TEST_VERSION( local_linear_id, Version(2, 0) ),
+    ADD_TEST_VERSION( enqueued_local_size, Version(2, 0) ),
+    ADD_TEST( simple_read_image_pitch ),
+    ADD_TEST( simple_write_image_pitch ),
 #if defined( __APPLE__ )
-    ADD_TEST(queue_priority),
+    ADD_TEST( queue_priority ),
-    ADD_TEST_VERSION(get_linear_ids, Version(2, 0)),
-    ADD_TEST_VERSION(rw_image_access_qualifier, Version(2, 0)),
+    ADD_TEST_VERSION( get_linear_ids, Version(2, 0) ),
+    ADD_TEST_VERSION( rw_image_access_qualifier, Version(2, 0) ),
 const int test_num = ARRAY_SIZE( test_list );
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h
index 4a01a8c..9fe17ef 100644
--- a/test_conformance/basic/procs.h
+++ b/test_conformance/basic/procs.h
@@ -115,42 +115,6 @@
 extern int      test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_async_copy_global_to_local2D(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_async_copy_local_to_global2D(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_async_copy_global_to_local3D(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_async_copy_local_to_global3D(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_async_work_group_copy_fence_import_after_export_aliased_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_async_work_group_copy_fence_import_after_export_aliased_global(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_async_work_group_copy_fence_export_after_import_aliased_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_async_work_group_copy_fence_export_after_import_aliased_global(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
 extern int      test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
@@ -162,12 +126,8 @@
 extern int      test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_vector_creation(cl_device_id deviceID, cl_context context,
-                                cl_command_queue queue, int num_elements);
-extern int test_vector_swizzle(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_vec_type_hint(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
+extern int      test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
diff --git a/test_conformance/basic/test_arraycopy.cpp b/test_conformance/basic/test_arraycopy.cpp
index 5a35286..e0cb565 100644
--- a/test_conformance/basic/test_arraycopy.cpp
+++ b/test_conformance/basic/test_arraycopy.cpp
@@ -51,8 +51,7 @@
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     // results
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             sizeof(cl_uint) * num_elements, NULL, &err);
+    results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
     test_error(err, "clCreateBuffer failed");
@@ -65,9 +64,7 @@
         input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
     // client backing
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                       sizeof(cl_uint) * num_elements, input_ptr, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
     test_error(err, "clCreateBuffer failed");
     delta_offset = num_elements * sizeof(cl_uint) / num_copies;
@@ -106,8 +103,7 @@
         input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
     // no backing
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
     test_error(err, "clCreateBuffer failed");
     for (i=0; i<num_copies; i++)
@@ -150,20 +146,17 @@
     free_mtdata(d); d= NULL;
     // client backing
-    streams[3] =
-        clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                       sizeof(cl_uint) * num_elements, input_ptr, &err);
-    test_error(err, "clCreateBuffer failed");
+  streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
+  test_error(err, "clCreateBuffer failed");
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &copy_kernel_code, "test_copy");
-    test_error(err, "create_single_kernel_helper failed");
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &copy_kernel_code, "test_copy" );
+  test_error(err, "create_single_kernel_helper failed");
-    err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
-    err |= clSetKernelArg(kernel, 1, sizeof results, &results);
-    test_error(err, "clSetKernelArg failed");
+  err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
+  err |= clSetKernelArg(kernel, 1, sizeof results, &results);
+  test_error(err, "clSetKernelArg failed");
-    size_t threads[3] = { num_elements, 0, 0 };
+  size_t threads[3] = {num_elements, 0, 0};
     err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
   test_error(err, "clEnqueueNDRangeKernel failed");
diff --git a/test_conformance/basic/test_arrayimagecopy.cpp b/test_conformance/basic/test_arrayimagecopy.cpp
index 5de5d01..5a0263f 100644
--- a/test_conformance/basic/test_arrayimagecopy.cpp
+++ b/test_conformance/basic/test_arrayimagecopy.cpp
@@ -38,8 +38,7 @@
   log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
-  image = create_image_2d(context, CL_MEM_READ_WRITE, format, img_width,
-                          img_height, 0, NULL, &err);
+  image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
   test_error(err, "create_image_2d failed");
   err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
@@ -47,7 +46,7 @@
   buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
   test_error(err, "clCreateBuffer failed");
   d = init_genrand( gRandomSeed );
diff --git a/test_conformance/basic/test_arrayimagecopy3d.cpp b/test_conformance/basic/test_arrayimagecopy3d.cpp
index 1b08ec9..d1d3652 100644
--- a/test_conformance/basic/test_arrayimagecopy3d.cpp
+++ b/test_conformance/basic/test_arrayimagecopy3d.cpp
@@ -39,8 +39,7 @@
   log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
-  image = create_image_3d(context, CL_MEM_READ_ONLY, format, img_width,
-                          img_height, img_depth, 0, 0, NULL, &err);
+  image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
   test_error(err, "create_image_3d failed");
   err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
@@ -48,7 +47,7 @@
   buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
   test_error(err, "clCreateBuffer failed");
   d = init_genrand( gRandomSeed );
@@ -126,15 +125,12 @@
-  err = clGetSupportedImageFormats(
-      context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
   test_error(err, "clGetSupportedImageFormats failed");
   formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_ONLY,
-                                   CL_MEM_OBJECT_IMAGE3D, num_formats, formats,
-                                   NULL);
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
   test_error(err, "clGetSupportedImageFormats failed");
   for (i = 0; i < num_formats; i++) {
diff --git a/test_conformance/basic/test_arrayreadwrite.cpp b/test_conformance/basic/test_arrayreadwrite.cpp
index 25e8ed9..6866439 100644
--- a/test_conformance/basic/test_arrayreadwrite.cpp
+++ b/test_conformance/basic/test_arrayreadwrite.cpp
@@ -43,8 +43,7 @@
     for (i=0; i<num_elements; i++)
         inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
     test_error(err, "clCreateBuffer failed");
     for (i=0; i<num_tries; i++)
diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
deleted file mode 100644
index 9fbdcb6..0000000
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ /dev/null
@@ -1,449 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "../../test_common/harness/compat.h"
-#include <algorithm>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include "../../test_common/harness/conversions.h"
-#include "procs.h"
-static const char *async_global_to_local_kernel2D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int "
-    "lineCopiesPerWorkItem, int srcStride, int dstStride )\n"
-    "{\n"
-    " int i, j;\n"
-    // Zero the local storage first
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ] = "
-    "(%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_2D2D( (__local %s*)localBuffer, "
-    "(__global const "
-    "%s*)(src+lineCopiesPerWorkgroup*get_group_id(0)*(numElementsPerLine + "
-    "srcStride)), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, "
-    "srcStride, dstStride, 0 );\n"
-    // Wait for the copy to complete, then verify by manually copying to the
-    // dest
-    "     wait_group_events( 1, &event );\n"
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     dst[ (get_global_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ] = "
-    "localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ];\n"
-    "}\n";
-static const char *async_local_to_global_kernel2D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int "
-    "lineCopiesPerWorkItem, int srcStride, int dstStride )\n"
-    "{\n"
-    " int i, j;\n"
-    // Zero the local storage first
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + srcStride)+j ] = "
-    "(%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + srcStride)+j ] = src[ "
-    "(get_global_id( 0 )*lineCopiesPerWorkItem+i)*(numElementsPerLine + "
-    "srcStride)+j ];\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_2D2D((__global "
-    "%s*)(dst+lineCopiesPerWorkgroup*get_group_id(0)*(numElementsPerLine + "
-    "dstStride)), (__local const %s*)localBuffer, (size_t)numElementsPerLine, "
-    "(size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0 );\n"
-    "    wait_group_events( 1, &event );\n"
-    "}\n";
-int test_copy2D(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, const char *kernelCode,
-                ExplicitType vecType, int vecSize, int srcStride, int dstStride,
-                bool localIsDst)
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[2];
-    size_t threads[1], localThreads[1];
-    void *inBuffer, *outBuffer, *outBufferCopy;
-    MTdata d;
-    char vecNameString[64];
-    vecNameString[0] = 0;
-    if (vecSize == 1)
-        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
-    else
-        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
-                vecSize);
-    size_t elementSize = get_explicit_type_size(vecType) * vecSize;
-    log_info("Testing %s with srcStride = %d, dstStride = %d\n", vecNameString,
-             srcStride, dstStride);
-    cl_long max_local_mem_size;
-    error =
-        clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
-                        sizeof(max_local_mem_size), &max_local_mem_size, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
-    cl_long max_global_mem_size;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
-                            sizeof(max_global_mem_size), &max_global_mem_size,
-                            NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed.");
-    cl_long max_alloc_size;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
-                            sizeof(max_alloc_size), &max_alloc_size, NULL);
-    test_error(error,
-               "clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed.");
-    if (max_alloc_size > max_global_mem_size / 2)
-        max_alloc_size = max_global_mem_size / 2;
-    unsigned int num_of_compute_devices;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS,
-                            sizeof(num_of_compute_devices),
-                            &num_of_compute_devices, NULL);
-    test_error(error,
-               "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
-    char programSource[4096];
-    programSource[0] = 0;
-    char *programPtr;
-    sprintf(programSource, kernelCode,
-            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-                               : "",
-            vecNameString, vecNameString, vecNameString, vecNameString,
-            get_explicit_type_name(vecType), vecNameString, vecNameString);
-    // log_info("program: %s\n", programSource);
-    programPtr = programSource;
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        (const char **)&programPtr, "test_fn");
-    test_error(error, "Unable to create testing kernel");
-    size_t max_workgroup_size;
-    error = clGetKernelWorkGroupInfo(
-        kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size),
-        &max_workgroup_size, NULL);
-    test_error(
-        error,
-        "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
-    size_t max_local_workgroup_size[3];
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
-                            sizeof(max_local_workgroup_size),
-                            max_local_workgroup_size, NULL);
-    test_error(error,
-               "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
-    // Pick the minimum of the device and the kernel
-    if (max_workgroup_size > max_local_workgroup_size[0])
-        max_workgroup_size = max_local_workgroup_size[0];
-    size_t numElementsPerLine = 10;
-    size_t lineCopiesPerWorkItem = 13;
-    elementSize =
-        get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-    size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem * elementSize
-        * (numElementsPerLine + (localIsDst ? dstStride : srcStride));
-    size_t maxLocalWorkgroupSize =
-        (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
-    // Calculation can return 0 on embedded devices due to 1KB local mem limit
-    if (maxLocalWorkgroupSize == 0)
-    {
-        maxLocalWorkgroupSize = 1;
-    }
-    size_t localWorkgroupSize = maxLocalWorkgroupSize;
-    if (maxLocalWorkgroupSize > max_workgroup_size)
-        localWorkgroupSize = max_workgroup_size;
-    size_t maxTotalLinesIn = (max_alloc_size / elementSize + srcStride)
-        / (numElementsPerLine + srcStride);
-    size_t maxTotalLinesOut = (max_alloc_size / elementSize + dstStride)
-        / (numElementsPerLine + dstStride);
-    size_t maxTotalLines = (std::min)(maxTotalLinesIn, maxTotalLinesOut);
-    size_t maxLocalWorkgroups =
-        maxTotalLines / (localWorkgroupSize * lineCopiesPerWorkItem);
-    size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
-        - (localIsDst ? dstStride : srcStride);
-    size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups);
-    size_t totalLines =
-        numberOfLocalWorkgroups * localWorkgroupSize * lineCopiesPerWorkItem;
-    size_t inBufferSize = elementSize
-        * (totalLines * numElementsPerLine + (totalLines - 1) * srcStride);
-    size_t outBufferSize = elementSize
-        * (totalLines * numElementsPerLine + (totalLines - 1) * dstStride);
-    size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
-    inBuffer = (void *)malloc(inBufferSize);
-    outBuffer = (void *)malloc(outBufferSize);
-    outBufferCopy = (void *)malloc(outBufferSize);
-    cl_int lineCopiesPerWorkItemInt, numElementsPerLineInt,
-        lineCopiesPerWorkgroup;
-    lineCopiesPerWorkItemInt = (int)lineCopiesPerWorkItem;
-    numElementsPerLineInt = (int)numElementsPerLine;
-    lineCopiesPerWorkgroup = (int)(lineCopiesPerWorkItem * localWorkgroupSize);
-    log_info(
-        "Global: %d, local %d, local buffer %db, global in buffer %db, "
-        "global out buffer %db, each work group will copy %d lines and each "
-        "work item item will copy %d lines.\n",
-        (int)globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize,
-        (int)inBufferSize, (int)outBufferSize, lineCopiesPerWorkgroup,
-        lineCopiesPerWorkItemInt);
-    threads[0] = globalWorkgroupSize;
-    localThreads[0] = localWorkgroupSize;
-    d = init_genrand(gRandomSeed);
-    generate_random_data(
-        vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer);
-    generate_random_data(
-        vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer);
-    free_mtdata(d);
-    d = NULL;
-    memcpy(outBufferCopy, outBuffer, outBufferSize);
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
-                                inBuffer, &error);
-    test_error(error, "Unable to create input buffer");
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, outBufferSize,
-                                outBuffer, &error);
-    test_error(error, "Unable to create output buffer");
-    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 2, localBufferSize, NULL);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 3, sizeof(numElementsPerLineInt),
-                           &numElementsPerLineInt);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 4, sizeof(lineCopiesPerWorkgroup),
-                           &lineCopiesPerWorkgroup);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 5, sizeof(lineCopiesPerWorkItemInt),
-                           &lineCopiesPerWorkItemInt);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 6, sizeof(srcStride), &srcStride);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 7, sizeof(dstStride), &dstStride);
-    test_error(error, "Unable to set kernel argument");
-    // Enqueue
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
-                                   localThreads, 0, NULL, NULL);
-    test_error(error, "Unable to queue kernel");
-    // Read
-    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize,
-                                outBuffer, 0, NULL, NULL);
-    test_error(error, "Unable to read results");
-    // Verify
-    int failuresPrinted = 0;
-    // Verify
-    size_t typeSize = get_explicit_type_size(vecType) * vecSize;
-    for (int i = 0;
-         i < (int)globalWorkgroupSize * lineCopiesPerWorkItem * elementSize;
-         i += elementSize)
-    {
-        for (int j = 0; j < (int)numElementsPerLine * elementSize;
-             j += elementSize)
-        {
-            int inIdx = i * (numElementsPerLine + srcStride) + j;
-            int outIdx = i * (numElementsPerLine + dstStride) + j;
-            if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx,
-                       typeSize)
-                != 0)
-            {
-                unsigned char *inchar = (unsigned char *)inBuffer + inIdx;
-                unsigned char *outchar = (unsigned char *)outBuffer + outIdx;
-                char values[4096];
-                values[0] = 0;
-                if (failuresPrinted == 0)
-                {
-                    // Print first failure message
-                    log_error("ERROR: Results of copy did not validate!\n");
-                }
-                sprintf(values + strlen(values), "%d -> [", inIdx);
-                for (int k = 0; k < (int)elementSize; k++)
-                    sprintf(values + strlen(values), "%2x ", inchar[k]);
-                sprintf(values + strlen(values), "] != [");
-                for (int k = 0; k < (int)elementSize; k++)
-                    sprintf(values + strlen(values), "%2x ", outchar[k]);
-                sprintf(values + strlen(values), "]");
-                log_error("%s\n", values);
-                failuresPrinted++;
-            }
-            if (failuresPrinted > 5)
-            {
-                log_error("Not printing further failures...\n");
-                return -1;
-            }
-        }
-        if (i < (int)(globalWorkgroupSize * lineCopiesPerWorkItem - 1)
-                * elementSize)
-        {
-            int outIdx = i * (numElementsPerLine + dstStride)
-                + numElementsPerLine * elementSize;
-            if (memcmp(((char *)outBuffer) + outIdx,
-                       ((char *)outBufferCopy) + outIdx,
-                       dstStride * elementSize)
-                != 0)
-            {
-                if (failuresPrinted == 0)
-                {
-                    // Print first failure message
-                    log_error("ERROR: Results of copy did not validate!\n");
-                }
-                log_error(
-                    "2D copy corrupted data in output buffer in the stride "
-                    "offset of line %d\n",
-                    i);
-                failuresPrinted++;
-            }
-            if (failuresPrinted > 5)
-            {
-                log_error("Not printing further failures...\n");
-                return -1;
-            }
-        }
-    }
-    free(inBuffer);
-    free(outBuffer);
-    free(outBufferCopy);
-    return failuresPrinted ? -1 : 0;
-int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, const char *kernelCode,
-                          bool localIsDst)
-    ExplicitType vecType[] = {
-        kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
-        kULong, kFloat, kDouble, kNumExplicitTypes
-    };
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
-    unsigned int size, typeIndex, srcStride, dstStride;
-    int errors = 0;
-    if (!is_extension_available(deviceID, "cl_khr_extended_async_copies"))
-    {
-        log_info(
-            "Device does not support extended async copies. Skipping test.\n");
-        return 0;
-    }
-    for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
-    {
-        if (vecType[typeIndex] == kDouble
-            && !is_extension_available(deviceID, "cl_khr_fp64"))
-            continue;
-        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
-            && !gHasLong)
-            continue;
-        for (size = 0; vecSizes[size] != 0; size++)
-        {
-            if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
-                <= 2) // small type
-            {
-                for (srcStride = 0; srcStride < sizeof(smallTypesStrideSizes)
-                         / sizeof(smallTypesStrideSizes[0]);
-                     srcStride++)
-                {
-                    for (dstStride = 0;
-                         dstStride < sizeof(smallTypesStrideSizes)
-                             / sizeof(smallTypesStrideSizes[0]);
-                         dstStride++)
-                    {
-                        if (test_copy2D(deviceID, context, queue, kernelCode,
-                                        vecType[typeIndex], vecSizes[size],
-                                        smallTypesStrideSizes[srcStride],
-                                        smallTypesStrideSizes[dstStride],
-                                        localIsDst))
-                        {
-                            errors++;
-                        }
-                    }
-                }
-            }
-            // not a small type, check only zero stride
-            else if (test_copy2D(deviceID, context, queue, kernelCode,
-                                 vecType[typeIndex], vecSizes[size], 0, 0,
-                                 localIsDst))
-            {
-                errors++;
-            }
-        }
-    }
-    if (errors) return -1;
-    return 0;
-int test_async_copy_global_to_local2D(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements)
-    return test_copy2D_all_types(deviceID, context, queue,
-                                 async_global_to_local_kernel2D, true);
-int test_async_copy_local_to_global2D(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements)
-    return test_copy2D_all_types(deviceID, context, queue,
-                                 async_local_to_global_kernel2D, false);
diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp
deleted file mode 100644
index 252159b..0000000
--- a/test_conformance/basic/test_async_copy3D.cpp
+++ /dev/null
@@ -1,546 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "../../test_common/harness/compat.h"
-#include <algorithm>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include "../../test_common/harness/conversions.h"
-#include "procs.h"
-static const char *async_global_to_local_kernel3D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int numLines, int "
-    "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
-    "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
-    "{\n"
-    " int i, j, k;\n"
-    // Zero the local storage first
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
-    "dstLineStride) + k ] = (%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_3D3D( (__local %s*)localBuffer, "
-    "(__global const "
-    "%s*)(src+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
-    "numElementsPerLine + numLines*srcLineStride + srcPlaneStride)), "
-    "(size_t)numElementsPerLine, (size_t)numLines, srcLineStride, "
-    "dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, dstPlaneStride, "
-    "0 );\n"
-    // Wait for the copy to complete, then verify by manually copying to the
-    // dest
-    " wait_group_events( 1, &event );\n"
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       dst[ (get_global_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
-    "dstLineStride) + k ] = localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
-    "dstLineStride) + k ];\n"
-    "}\n";
-static const char *async_local_to_global_kernel3D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int numLines, int "
-    "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
-    "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
-    "{\n"
-    " int i, j, k;\n"
-    // Zero the local storage first
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
-    "srcLineStride) + k ] = (%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
-    "srcLineStride) + k ] = src[ (get_global_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
-    "srcLineStride) + k ];\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_3D3D((__global "
-    "%s*)(dst+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
-    "numElementsPerLine + numLines*dstLineStride + dstPlaneStride)), (__local "
-    "const %s*)localBuffer, (size_t)numElementsPerLine, (size_t)numLines, "
-    "srcLineStride, dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, "
-    "dstPlaneStride, 0 );\n"
-    "    wait_group_events( 1, &event );\n"
-    "}\n";
-int test_copy3D(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, const char *kernelCode,
-                ExplicitType vecType, int vecSize, int srcLineStride,
-                int dstLineStride, int srcPlaneStride, int dstPlaneStride,
-                bool localIsDst)
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[2];
-    size_t threads[1], localThreads[1];
-    void *inBuffer, *outBuffer, *outBufferCopy;
-    MTdata d;
-    char vecNameString[64];
-    vecNameString[0] = 0;
-    if (vecSize == 1)
-        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
-    else
-        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
-                vecSize);
-    size_t elementSize = get_explicit_type_size(vecType) * vecSize;
-    log_info("Testing %s with srcLineStride = %d, dstLineStride = %d, "
-             "srcPlaneStride = %d, dstPlaneStride = %d\n",
-             vecNameString, srcLineStride, dstLineStride, srcPlaneStride,
-             dstPlaneStride);
-    cl_long max_local_mem_size;
-    error =
-        clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
-                        sizeof(max_local_mem_size), &max_local_mem_size, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
-    cl_long max_global_mem_size;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
-                            sizeof(max_global_mem_size), &max_global_mem_size,
-                            NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed.");
-    cl_long max_alloc_size;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
-                            sizeof(max_alloc_size), &max_alloc_size, NULL);
-    test_error(error,
-               "clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed.");
-    if (max_alloc_size > max_global_mem_size / 2)
-        max_alloc_size = max_global_mem_size / 2;
-    unsigned int num_of_compute_devices;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS,
-                            sizeof(num_of_compute_devices),
-                            &num_of_compute_devices, NULL);
-    test_error(error,
-               "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
-    char programSource[4096];
-    programSource[0] = 0;
-    char *programPtr;
-    sprintf(programSource, kernelCode,
-            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-                               : "",
-            vecNameString, vecNameString, vecNameString, vecNameString,
-            get_explicit_type_name(vecType), vecNameString, vecNameString);
-    // log_info("program: %s\n", programSource);
-    programPtr = programSource;
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        (const char **)&programPtr, "test_fn");
-    test_error(error, "Unable to create testing kernel");
-    size_t max_workgroup_size;
-    error = clGetKernelWorkGroupInfo(
-        kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size),
-        &max_workgroup_size, NULL);
-    test_error(
-        error,
-        "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
-    size_t max_local_workgroup_size[3];
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
-                            sizeof(max_local_workgroup_size),
-                            max_local_workgroup_size, NULL);
-    test_error(error,
-               "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
-    // Pick the minimum of the device and the kernel
-    if (max_workgroup_size > max_local_workgroup_size[0])
-        max_workgroup_size = max_local_workgroup_size[0];
-    size_t numElementsPerLine = 10;
-    size_t numLines = 13;
-    size_t planesCopiesPerWorkItem = 2;
-    elementSize =
-        get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-    size_t localStorageSpacePerWorkitem = elementSize
-        * (planesCopiesPerWorkItem
-           * (numLines * numElementsPerLine
-              + numLines * (localIsDst ? dstLineStride : srcLineStride)
-              + (localIsDst ? dstPlaneStride : srcPlaneStride)));
-    size_t maxLocalWorkgroupSize =
-        (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
-    // Calculation can return 0 on embedded devices due to 1KB local mem limit
-    if (maxLocalWorkgroupSize == 0)
-    {
-        maxLocalWorkgroupSize = 1;
-    }
-    size_t localWorkgroupSize = maxLocalWorkgroupSize;
-    if (maxLocalWorkgroupSize > max_workgroup_size)
-        localWorkgroupSize = max_workgroup_size;
-    size_t maxTotalPlanesIn = ((max_alloc_size / elementSize) + srcPlaneStride)
-        / ((numLines * numElementsPerLine + numLines * srcLineStride)
-           + srcPlaneStride);
-    size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride)
-        / ((numLines * numElementsPerLine + numLines * dstLineStride)
-           + dstPlaneStride);
-    size_t maxTotalPlanes = (std::min)(maxTotalPlanesIn, maxTotalPlanesOut);
-    size_t maxLocalWorkgroups =
-        maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem);
-    size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
-        - (localIsDst ? dstPlaneStride : srcPlaneStride);
-    size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups);
-    size_t totalPlanes =
-        numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem;
-    size_t inBufferSize = elementSize
-        * (totalPlanes
-               * (numLines * numElementsPerLine + numLines * srcLineStride)
-           + (totalPlanes - 1) * srcPlaneStride);
-    size_t outBufferSize = elementSize
-        * (totalPlanes
-               * (numLines * numElementsPerLine + numLines * dstLineStride)
-           + (totalPlanes - 1) * dstPlaneStride);
-    size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
-    inBuffer = (void *)malloc(inBufferSize);
-    outBuffer = (void *)malloc(outBufferSize);
-    outBufferCopy = (void *)malloc(outBufferSize);
-    cl_int planesCopiesPerWorkItemInt, numElementsPerLineInt, numLinesInt,
-        planesCopiesPerWorkgroup;
-    planesCopiesPerWorkItemInt = (int)planesCopiesPerWorkItem;
-    numElementsPerLineInt = (int)numElementsPerLine;
-    numLinesInt = (int)numLines;
-    planesCopiesPerWorkgroup =
-        (int)(planesCopiesPerWorkItem * localWorkgroupSize);
-    log_info("Global: %d, local %d, local buffer %db, global in buffer %db, "
-             "global out buffer %db, each work group will copy %d planes and "
-             "each work item item will copy %d planes.\n",
-             (int)globalWorkgroupSize, (int)localWorkgroupSize,
-             (int)localBufferSize, (int)inBufferSize, (int)outBufferSize,
-             planesCopiesPerWorkgroup, planesCopiesPerWorkItemInt);
-    threads[0] = globalWorkgroupSize;
-    localThreads[0] = localWorkgroupSize;
-    d = init_genrand(gRandomSeed);
-    generate_random_data(
-        vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer);
-    generate_random_data(
-        vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer);
-    free_mtdata(d);
-    d = NULL;
-    memcpy(outBufferCopy, outBuffer, outBufferSize);
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
-                                inBuffer, &error);
-    test_error(error, "Unable to create input buffer");
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, outBufferSize,
-                                outBuffer, &error);
-    test_error(error, "Unable to create output buffer");
-    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 2, localBufferSize, NULL);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 3, sizeof(numElementsPerLineInt),
-                           &numElementsPerLineInt);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 4, sizeof(numLinesInt), &numLinesInt);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 5, sizeof(planesCopiesPerWorkgroup),
-                           &planesCopiesPerWorkgroup);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 6, sizeof(planesCopiesPerWorkItemInt),
-                           &planesCopiesPerWorkItemInt);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 7, sizeof(srcLineStride), &srcLineStride);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 8, sizeof(dstLineStride), &dstLineStride);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 9, sizeof(srcPlaneStride), &srcPlaneStride);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, 10, sizeof(dstPlaneStride), &dstPlaneStride);
-    test_error(error, "Unable to set kernel argument");
-    // Enqueue
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
-                                   localThreads, 0, NULL, NULL);
-    test_error(error, "Unable to queue kernel");
-    // Read
-    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize,
-                                outBuffer, 0, NULL, NULL);
-    test_error(error, "Unable to read results");
-    // Verify
-    int failuresPrinted = 0;
-    // Verify
-    size_t typeSize = get_explicit_type_size(vecType) * vecSize;
-    for (int i = 0;
-         i < (int)globalWorkgroupSize * planesCopiesPerWorkItem * elementSize;
-         i += elementSize)
-    {
-        for (int j = 0; j < (int)numLines * elementSize; j += elementSize)
-        {
-            for (int k = 0; k < (int)numElementsPerLine * elementSize;
-                 k += elementSize)
-            {
-                int inIdx = i
-                        * (numLines * numElementsPerLine
-                           + numLines * srcLineStride + srcPlaneStride)
-                    + j * (numElementsPerLine + srcLineStride) + k;
-                int outIdx = i
-                        * (numLines * numElementsPerLine
-                           + numLines * dstLineStride + dstPlaneStride)
-                    + j * (numElementsPerLine + dstLineStride) + k;
-                if (memcmp(((char *)inBuffer) + inIdx,
-                           ((char *)outBuffer) + outIdx, typeSize)
-                    != 0)
-                {
-                    unsigned char *inchar = (unsigned char *)inBuffer + inIdx;
-                    unsigned char *outchar =
-                        (unsigned char *)outBuffer + outIdx;
-                    char values[4096];
-                    values[0] = 0;
-                    if (failuresPrinted == 0)
-                    {
-                        // Print first failure message
-                        log_error("ERROR: Results of copy did not validate!");
-                    }
-                    sprintf(values + strlen(values), "%d -> [", inIdx);
-                    for (int l = 0; l < (int)elementSize; l++)
-                        sprintf(values + strlen(values), "%2x ", inchar[l]);
-                    sprintf(values + strlen(values), "] != [");
-                    for (int l = 0; l < (int)elementSize; l++)
-                        sprintf(values + strlen(values), "%2x ", outchar[l]);
-                    sprintf(values + strlen(values), "]");
-                    log_error("%s\n", values);
-                    failuresPrinted++;
-                }
-                if (failuresPrinted > 5)
-                {
-                    log_error("Not printing further failures...\n");
-                    return -1;
-                }
-            }
-            if (j < (int)numLines * elementSize)
-            {
-                int outIdx = i
-                        * (numLines * numElementsPerLine
-                           + numLines * dstLineStride + dstPlaneStride)
-                    + j * (numElementsPerLine + dstLineStride)
-                    + numElementsPerLine * elementSize;
-                if (memcmp(((char *)outBuffer) + outIdx,
-                           ((char *)outBufferCopy) + outIdx,
-                           dstLineStride * elementSize)
-                    != 0)
-                {
-                    if (failuresPrinted == 0)
-                    {
-                        // Print first failure message
-                        log_error("ERROR: Results of copy did not validate!\n");
-                    }
-                    log_error(
-                        "3D copy corrupted data in output buffer in the line "
-                        "stride offset of plane %d line %d\n",
-                        i, j);
-                    failuresPrinted++;
-                }
-                if (failuresPrinted > 5)
-                {
-                    log_error("Not printing further failures...\n");
-                    return -1;
-                }
-            }
-        }
-        if (i < (int)(globalWorkgroupSize * planesCopiesPerWorkItem - 1)
-                * elementSize)
-        {
-            int outIdx = i
-                    * (numLines * numElementsPerLine + numLines * dstLineStride
-                       + dstPlaneStride)
-                + (numLines * elementSize) * (numElementsPerLine)
-                + (numLines * elementSize) * (dstLineStride);
-            if (memcmp(((char *)outBuffer) + outIdx,
-                       ((char *)outBufferCopy) + outIdx,
-                       dstPlaneStride * elementSize)
-                != 0)
-            {
-                if (failuresPrinted == 0)
-                {
-                    // Print first failure message
-                    log_error("ERROR: Results of copy did not validate!\n");
-                }
-                log_error("3D copy corrupted data in output buffer in the "
-                          "plane stride "
-                          "offset of plane %d\n",
-                          i);
-                failuresPrinted++;
-            }
-            if (failuresPrinted > 5)
-            {
-                log_error("Not printing further failures...\n");
-                return -1;
-            }
-        }
-    }
-    free(inBuffer);
-    free(outBuffer);
-    free(outBufferCopy);
-    return failuresPrinted ? -1 : 0;
-int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, const char *kernelCode,
-                          bool localIsDst)
-    ExplicitType vecType[] = {
-        kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
-        kULong, kFloat, kDouble, kNumExplicitTypes
-    };
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
-    unsigned int size, typeIndex, srcLineStride, dstLineStride, srcPlaneStride,
-        dstPlaneStride;
-    int errors = 0;
-    if (!is_extension_available(deviceID, "cl_khr_extended_async_copies"))
-    {
-        log_info(
-            "Device does not support extended async copies. Skipping test.\n");
-        return 0;
-    }
-    for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
-    {
-        if (vecType[typeIndex] == kDouble
-            && !is_extension_available(deviceID, "cl_khr_fp64"))
-            continue;
-        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
-            && !gHasLong)
-            continue;
-        for (size = 0; vecSizes[size] != 0; size++)
-        {
-            if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
-                <= 2) // small type
-            {
-                for (srcLineStride = 0;
-                     srcLineStride < sizeof(smallTypesStrideSizes)
-                         / sizeof(smallTypesStrideSizes[0]);
-                     srcLineStride++)
-                {
-                    for (dstLineStride = 0;
-                         dstLineStride < sizeof(smallTypesStrideSizes)
-                             / sizeof(smallTypesStrideSizes[0]);
-                         dstLineStride++)
-                    {
-                        for (srcPlaneStride = 0;
-                             srcPlaneStride < sizeof(smallTypesStrideSizes)
-                                 / sizeof(smallTypesStrideSizes[0]);
-                             srcPlaneStride++)
-                        {
-                            for (dstPlaneStride = 0;
-                                 dstPlaneStride < sizeof(smallTypesStrideSizes)
-                                     / sizeof(smallTypesStrideSizes[0]);
-                                 dstPlaneStride++)
-                            {
-                                if (test_copy3D(
-                                        deviceID, context, queue, kernelCode,
-                                        vecType[typeIndex], vecSizes[size],
-                                        smallTypesStrideSizes[srcLineStride],
-                                        smallTypesStrideSizes[dstLineStride],
-                                        smallTypesStrideSizes[srcPlaneStride],
-                                        smallTypesStrideSizes[dstPlaneStride],
-                                        localIsDst))
-                                {
-                                    errors++;
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            // not a small type, check only zero stride
-            else if (test_copy3D(deviceID, context, queue, kernelCode,
-                                 vecType[typeIndex], vecSizes[size], 0, 0, 0, 0,
-                                 localIsDst))
-            {
-                errors++;
-            }
-        }
-    }
-    if (errors) return -1;
-    return 0;
-int test_async_copy_global_to_local3D(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements)
-    return test_copy3D_all_types(deviceID, context, queue,
-                                 async_global_to_local_kernel3D, true);
-int test_async_copy_local_to_global3D(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements)
-    return test_copy3D_all_types(deviceID, context, queue,
-                                 async_local_to_global_kernel3D, false);
diff --git a/test_conformance/basic/test_async_copy_fence.cpp b/test_conformance/basic/test_async_copy_fence.cpp
deleted file mode 100644
index 43245da..0000000
--- a/test_conformance/basic/test_async_copy_fence.cpp
+++ /dev/null
@@ -1,812 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "../../test_common/harness/compat.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include "../../test_common/harness/conversions.h"
-#include "procs.h"
-static const char *import_after_export_aliased_local_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *exportSrc, __global %s "
-    "*exportDst,\n"
-    "                       const __global %s *importSrc, __global %s "
-    "*importDst,\n"
-    "                       __local %s *localBuffer, /* there isn't another "
-    "__local %s local buffer since export src and import dst are aliased*/\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem,\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    "    int localImportOffset = exportSrcLocalSize - importSrcLocalSize;\n"
-    // Zero the local storage first
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] = "
-    "(%s)(%s)0;\n"
-    "    }\n"
-    "    // no need to set another local buffer values to (%s)(%s)0 since "
-    "export src and import dst are aliased (use the same buffer)\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] = "
-    "exportSrc[ get_global_id( 0 )*exportCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(exportDst+exportSrcLocalSize*get_group_id(0)), (__local const "
-    "%s*)localBuffer, (size_t)exportSrcLocalSize, 0 );\n"
-    "    async_work_group_copy_fence( CLK_LOCAL_MEM_FENCE );\n"
-    "    events = async_work_group_copy( (__local "
-    "%s*)(localBuffer+localImportOffset), (__global const "
-    "%s*)(importSrc+importSrcLocalSize*get_group_id(0)), "
-    "(size_t)importSrcLocalSize, events );\n"
-    // Wait for the export and import to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "(localBuffer+localImportOffset)[ get_local_id( 0 "
-    ")*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-static const char *import_after_export_aliased_global_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *exportSrc, __global %s "
-    "*exportDstImportSrc,\n"
-    "                       __global %s *importDst, /* there isn't a dedicated "
-    "__global %s buffer for import src since export dst and import src are "
-    "aliased*/\n"
-    "                       __local %s *exportLocalBuffer, __local %s "
-    "*importLocalBuffer,\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem,\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    // Zero the local storage first
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        exportLocalBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] "
-    "= (%s)(%s)0;\n"
-    "    }\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importLocalBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ] "
-    "= (%s)(%s)0;\n"
-    "    }\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        exportLocalBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] "
-    "= exportSrc[ get_global_id( 0 )*exportCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(exportDstImportSrc+exportSrcLocalSize*get_group_id(0)), (__local "
-    "const %s*)exportLocalBuffer, (size_t)exportSrcLocalSize, 0 );\n"
-    "    async_work_group_copy_fence( CLK_GLOBAL_MEM_FENCE );\n"
-    "    events = async_work_group_copy( (__local %s*)importLocalBuffer, "
-    "(__global const "
-    "%s*)(exportDstImportSrc+exportSrcLocalSize*get_group_id(0) + "
-    "(exportSrcLocalSize - importSrcLocalSize)), (size_t)importSrcLocalSize, "
-    "events );\n"
-    // Wait for the export and import to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "importLocalBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-static const char *import_after_export_aliased_global_and_local_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *exportSrc, __global %s "
-    "*exportDstImportSrc,\n"
-    "                       __global %s *importDst, /* there isn't a dedicated "
-    "__global %s buffer for import src since export dst and import src are "
-    "aliased*/\n"
-    "                       __local %s *localBuffer, /* there isn't another "
-    "__local %s local buffer since export src and import dst are aliased*/\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem,\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    "    int localImportOffset = exportSrcLocalSize - importSrcLocalSize;\n"
-    // Zero the local storage first
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] = "
-    "(%s)(%s)0;\n"
-    "    }\n"
-    "    // no need to set another local buffer values to (%s)(%s)0 since "
-    "export src and import dst are aliased (use the same buffer)\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] = "
-    "exportSrc[ get_global_id( 0 )*exportCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the export and import
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(exportDstImportSrc+exportSrcLocalSize*get_group_id(0)), (__local "
-    "const %s*)localBuffer, (size_t)exportSrcLocalSize, 0 );\n"
-    "    async_work_group_copy_fence( CLK_GLOBAL_MEM_FENCE | "
-    "    events = async_work_group_copy( (__local "
-    "%s*)(localBuffer+localImportOffset), (__global const "
-    "%s*)(exportDstImportSrc+exportSrcLocalSize*get_group_id(0) + "
-    "(exportSrcLocalSize - importSrcLocalSize)), (size_t)importSrcLocalSize, "
-    "events );\n"
-    // Wait for the export and import to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "(localBuffer+localImportOffset)[ get_local_id( 0 "
-    ")*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-static const char *export_after_import_aliased_local_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *importSrc, __global %s "
-    "*importDst,\n"
-    "                       const __global %s *exportDst, /* there isn't a "
-    "dedicated __global %s buffer for export src since the local memory is "
-    "aliased, so the export src is taken from it */\n"
-    "                       __local %s *localBuffer, /* there isn't another "
-    "__local %s local buffer since import dst and export src are aliased*/\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem,\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    // Zero the local storage first
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "(%s)(%s)0;\n"
-    "    }\n"
-    "    // no need to set another local buffer values to (%s)(%s)0 since "
-    "import dst and export src are aliased (use the same buffer)\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the import and export
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy( (__local %s*)localBuffer, (__global "
-    "const %s*)(importSrc+importSrcLocalSize*get_group_id(0)), "
-    "(size_t)importSrcLocalSize, events );\n"
-    "    async_work_group_copy_fence( CLK_LOCAL_MEM_FENCE );\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(exportDst+exportSrcLocalSize*get_group_id(0)), (__local const "
-    "%s*)(localBuffer + (importSrcLocalSize - exportSrcLocalSize)), "
-    "(size_t)exportSrcLocalSize, 0 );\n"
-    // Wait for the import and export to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "localBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-static const char *export_after_import_aliased_global_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *importSrcExportDst, __global %s "
-    "*importDst,\n"
-    "                       const __global %s *exportSrc,\n"
-    "                       /* there isn't a dedicated __global %s buffer for "
-    "export dst since import src and export dst are aliased */\n"
-    "                       __local %s *importLocalBuffer, __local %s "
-    "*exportLocalBuffer,\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem,\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    // Zero the local storage first
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importLocalBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ] "
-    "= (%s)(%s)0;\n"
-    "    }\n"
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        exportLocalBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] "
-    "= (%s)(%s)0;\n"
-    "    }\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the import and export
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    for(i=0; i<exportCopiesPerWorkItem; i++) {\n"
-    "        exportLocalBuffer[ get_local_id( 0 )*exportCopiesPerWorkItem+i ] "
-    "= exportSrc[ get_global_id( 0 )*exportCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the import and export
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy( (__local %s*)importLocalBuffer, "
-    "(__global const "
-    "%s*)(importSrcExportDst+importSrcLocalSize*get_group_id(0)), "
-    "(size_t)importSrcLocalSize, 0 );\n"
-    "    async_work_group_copy_fence( CLK_GLOBAL_MEM_FENCE );\n"
-    "    events = async_work_group_copy((__global "
-    "%s*)(importSrcExportDst+importSrcLocalSize*get_group_id(0) + "
-    "(importSrcLocalSize - exportSrcLocalSize)), (__local const "
-    "%s*)exportLocalBuffer, (size_t)exportSrcLocalSize, events );\n"
-    // Wait for the import and export to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "importLocalBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-static const char *export_after_import_aliased_global_and_local_kernel =
-    "#pragma OPENCL EXTENSION cl_khr_async_work_group_copy_fence : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *importSrcExportDst, __global %s "
-    "*importDst,\n"
-    "                       /* there isn't a dedicated __global %s buffer for "
-    "export src since the local memory is aliased, so the export src is taken "
-    "from it */\n"
-    "                       /* there isn't a dedicated __global %s buffer for "
-    "export dst since import src and export dst are aliased */\n"
-    "                       __local %s *localBuffer, /* there isn't another "
-    "__local %s local buffer since import dst and export src are aliased*/\n"
-    "                       int importSrcLocalSize, int "
-    "importCopiesPerWorkItem,\n"
-    "                       int exportSrcLocalSize, int "
-    "exportCopiesPerWorkItem )\n"
-    "{\n"
-    "    int i;\n"
-    // Zero the local storage first
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        localBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "(%s)(%s)0;\n"
-    "    }\n"
-    "    // no need to set another local buffer values to (%s)(%s)0 since "
-    "import dst and export src are aliased (use the same buffer)\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the import and export
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t events;\n"
-    "    events = async_work_group_copy( (__local %s*)localBuffer, (__global "
-    "const %s*)(importSrcExportDst+importSrcLocalSize*get_group_id(0)), "
-    "(size_t)importSrcLocalSize, 0 );\n"
-    "    async_work_group_copy_fence( CLK_GLOBAL_MEM_FENCE | "
-    "    events = async_work_group_copy((__global "
-    "%s*)(importSrcExportDst+importSrcLocalSize*get_group_id(0) + "
-    "(importSrcLocalSize - exportSrcLocalSize)), (__local const "
-    "%s*)(localBuffer + (importSrcLocalSize - exportSrcLocalSize)), "
-    "(size_t)exportSrcLocalSize, events );\n"
-    // Wait for the import and export to complete, then verify by manually
-    // copying to the dest
-    "    wait_group_events( 2, &events );\n"
-    "    for(i=0; i<importCopiesPerWorkItem; i++) {\n"
-    "        importDst[ get_global_id( 0 )*importCopiesPerWorkItem+i ] = "
-    "localBuffer[ get_local_id( 0 )*importCopiesPerWorkItem+i ];\n"
-    "    }\n"
-    "}\n";
-int test_copy_fence(cl_device_id deviceID, cl_context context,
-                    cl_command_queue queue, const char *kernelCode,
-                    ExplicitType vecType, int vecSize, bool export_after_import,
-                    bool aliased_local_mem, bool aliased_global_mem)
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[4];
-    size_t threads[1], localThreads[1];
-    void *transaction1InBuffer, *transaction1OutBuffer, *transaction2InBuffer,
-        *transaction2OutBuffer;
-    MTdata d;
-    bool transaction1DstIsTransaction2Src =
-        (aliased_global_mem && !export_after_import)
-        || (aliased_local_mem && export_after_import);
-    bool transaction1SrcIsTransaction2Dst =
-        aliased_global_mem && export_after_import;
-    char vecNameString[64];
-    vecNameString[0] = 0;
-    if (vecSize == 1)
-        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
-    else
-        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
-                vecSize);
-    size_t elementSize = get_explicit_type_size(vecType) * vecSize;
-    log_info("Testing %s\n", vecNameString);
-    cl_long max_local_mem_size;
-    error =
-        clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
-                        sizeof(max_local_mem_size), &max_local_mem_size, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
-    unsigned int num_of_compute_devices;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS,
-                            sizeof(num_of_compute_devices),
-                            &num_of_compute_devices, NULL);
-    test_error(error,
-               "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
-    char programSource[4096];
-    programSource[0] = 0;
-    char *programPtr;
-    sprintf(programSource, kernelCode,
-            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-                               : "",
-            vecNameString, vecNameString, vecNameString, vecNameString,
-            vecNameString, vecNameString, vecNameString,
-            get_explicit_type_name(vecType), vecNameString,
-            get_explicit_type_name(vecType), vecNameString, vecNameString,
-            vecNameString, vecNameString);
-    // log_info("program: %s\n", programSource);
-    programPtr = programSource;
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        (const char **)&programPtr, "test_fn");
-    test_error(error, "Unable to create testing kernel");
-    size_t max_workgroup_size;
-    error = clGetKernelWorkGroupInfo(
-        kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size),
-        &max_workgroup_size, NULL);
-    test_error(
-        error,
-        "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
-    size_t max_local_workgroup_size[3];
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
-                            sizeof(max_local_workgroup_size),
-                            max_local_workgroup_size, NULL);
-    test_error(error,
-               "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
-    // Pick the minimum of the device and the kernel
-    if (max_workgroup_size > max_local_workgroup_size[0])
-        max_workgroup_size = max_local_workgroup_size[0];
-    size_t transaction1NumberOfCopiesPerWorkitem = 13;
-    size_t transaction2NumberOfCopiesPerWorkitem = 2;
-    elementSize =
-        get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-    size_t localStorageSpacePerWorkitem =
-        transaction1NumberOfCopiesPerWorkitem * elementSize
-        + (aliased_local_mem
-               ? 0
-               : transaction2NumberOfCopiesPerWorkitem * elementSize);
-    size_t maxLocalWorkgroupSize =
-        (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
-    // Calculation can return 0 on embedded devices due to 1KB local mem limit
-    if (maxLocalWorkgroupSize == 0)
-    {
-        maxLocalWorkgroupSize = 1;
-    }
-    size_t localWorkgroupSize = maxLocalWorkgroupSize;
-    if (maxLocalWorkgroupSize > max_workgroup_size)
-        localWorkgroupSize = max_workgroup_size;
-    size_t transaction1LocalBufferSize = localWorkgroupSize * elementSize
-        * transaction1NumberOfCopiesPerWorkitem;
-    size_t transaction2LocalBufferSize = localWorkgroupSize * elementSize
-        * transaction2NumberOfCopiesPerWorkitem; // irrelevant if
-                                                 // aliased_local_mem
-    size_t numberOfLocalWorkgroups = 1111;
-    size_t transaction1GlobalBufferSize =
-        numberOfLocalWorkgroups * transaction1LocalBufferSize;
-    size_t transaction2GlobalBufferSize =
-        numberOfLocalWorkgroups * transaction2LocalBufferSize;
-    size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
-    transaction1InBuffer = (void *)malloc(transaction1GlobalBufferSize);
-    transaction1OutBuffer = (void *)malloc(transaction1GlobalBufferSize);
-    transaction2InBuffer = (void *)malloc(transaction2GlobalBufferSize);
-    transaction2OutBuffer = (void *)malloc(transaction2GlobalBufferSize);
-    memset(transaction1OutBuffer, 0, transaction1GlobalBufferSize);
-    memset(transaction2OutBuffer, 0, transaction2GlobalBufferSize);
-    cl_int transaction1CopiesPerWorkitemInt, transaction1CopiesPerWorkgroup,
-        transaction2CopiesPerWorkitemInt, transaction2CopiesPerWorkgroup;
-    transaction1CopiesPerWorkitemInt =
-        (int)transaction1NumberOfCopiesPerWorkitem;
-    transaction1CopiesPerWorkgroup =
-        (int)(transaction1NumberOfCopiesPerWorkitem * localWorkgroupSize);
-    transaction2CopiesPerWorkitemInt =
-        (int)transaction2NumberOfCopiesPerWorkitem;
-    transaction2CopiesPerWorkgroup =
-        (int)(transaction2NumberOfCopiesPerWorkitem * localWorkgroupSize);
-    log_info(
-        "Global: %d, local %d. 1st Transaction: local buffer %db, global "
-        "buffer %db, each work group will copy %d elements and each work "
-        "item item will copy %d elements. 2nd Transaction: local buffer "
-        "%db, global buffer %db, each work group will copy %d elements and "
-        "each work item will copy %d elements\n",
-        (int)globalWorkgroupSize, (int)localWorkgroupSize,
-        (int)transaction1LocalBufferSize, (int)transaction1GlobalBufferSize,
-        transaction1CopiesPerWorkgroup, transaction1CopiesPerWorkitemInt,
-        (int)transaction2LocalBufferSize, (int)transaction2GlobalBufferSize,
-        transaction2CopiesPerWorkgroup, transaction2CopiesPerWorkitemInt);
-    threads[0] = globalWorkgroupSize;
-    localThreads[0] = localWorkgroupSize;
-    d = init_genrand(gRandomSeed);
-    generate_random_data(
-        vecType, transaction1GlobalBufferSize / get_explicit_type_size(vecType),
-        d, transaction1InBuffer);
-    if (!transaction1DstIsTransaction2Src)
-    {
-        generate_random_data(vecType,
-                             transaction2GlobalBufferSize
-                                 / get_explicit_type_size(vecType),
-                             d, transaction2InBuffer);
-    }
-    free_mtdata(d);
-    d = NULL;
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                transaction1GlobalBufferSize,
-                                transaction1InBuffer, &error);
-    test_error(error, "Unable to create input buffer");
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                transaction1GlobalBufferSize,
-                                transaction1OutBuffer, &error);
-    test_error(error, "Unable to create output buffer");
-    if (!transaction1DstIsTransaction2Src)
-    {
-        streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    transaction2GlobalBufferSize,
-                                    transaction2InBuffer, &error);
-        test_error(error, "Unable to create input buffer");
-    }
-    if (!transaction1SrcIsTransaction2Dst)
-    {
-        streams[3] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    transaction2GlobalBufferSize,
-                                    transaction2OutBuffer, &error);
-        test_error(error, "Unable to create output buffer");
-    }
-    cl_uint argIndex = 0;
-    error = clSetKernelArg(kernel, argIndex, sizeof(streams[0]), &streams[0]);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    error = clSetKernelArg(kernel, argIndex, sizeof(streams[1]), &streams[1]);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    if (!transaction1DstIsTransaction2Src)
-    {
-        error =
-            clSetKernelArg(kernel, argIndex, sizeof(streams[2]), &streams[2]);
-        test_error(error, "Unable to set kernel argument");
-        ++argIndex;
-    }
-    if (!transaction1SrcIsTransaction2Dst)
-    {
-        error =
-            clSetKernelArg(kernel, argIndex, sizeof(streams[3]), &streams[3]);
-        test_error(error, "Unable to set kernel argument");
-        ++argIndex;
-    }
-    error = clSetKernelArg(kernel, argIndex, transaction1LocalBufferSize, NULL);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    if (!aliased_local_mem)
-    {
-        error =
-            clSetKernelArg(kernel, argIndex, transaction2LocalBufferSize, NULL);
-        test_error(error, "Unable to set kernel argument");
-        ++argIndex;
-    }
-    error =
-        clSetKernelArg(kernel, argIndex, sizeof(transaction1CopiesPerWorkgroup),
-                       &transaction1CopiesPerWorkgroup);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    error = clSetKernelArg(kernel, argIndex,
-                           sizeof(transaction1CopiesPerWorkitemInt),
-                           &transaction1CopiesPerWorkitemInt);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    error =
-        clSetKernelArg(kernel, argIndex, sizeof(transaction2CopiesPerWorkgroup),
-                       &transaction2CopiesPerWorkgroup);
-    test_error(error, "Unable to set kernel argument");
-    ++argIndex;
-    error = clSetKernelArg(kernel, argIndex,
-                           sizeof(transaction2CopiesPerWorkitemInt),
-                           &transaction2CopiesPerWorkitemInt);
-    test_error(error, "Unable to set kernel argument");
-    // Enqueue
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
-                                   localThreads, 0, NULL, NULL);
-    test_error(error, "Unable to queue kernel");
-    // Read
-    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
-                                transaction1GlobalBufferSize,
-                                transaction1OutBuffer, 0, NULL, NULL);
-    test_error(error, "Unable to read results");
-    if (transaction1DstIsTransaction2Src)
-    {
-        for (size_t idx = 0; idx < numberOfLocalWorkgroups; idx++)
-        {
-            memcpy(
-                (void *)((unsigned char *)transaction2InBuffer
-                         + idx * transaction2CopiesPerWorkgroup * elementSize),
-                (const void *)((unsigned char *)transaction1OutBuffer
-                               + (idx * transaction1CopiesPerWorkgroup
-                                  + (transaction1CopiesPerWorkgroup
-                                     - transaction2CopiesPerWorkgroup))
-                                   * elementSize),
-                (size_t)transaction2CopiesPerWorkgroup * elementSize);
-        }
-    }
-    if (transaction1SrcIsTransaction2Dst)
-    {
-        void *transaction1SrcBuffer =
-            (void *)malloc(transaction1GlobalBufferSize);
-        error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0,
-                                    transaction1GlobalBufferSize,
-                                    transaction1SrcBuffer, 0, NULL, NULL);
-        test_error(error, "Unable to read results");
-        for (size_t idx = 0; idx < numberOfLocalWorkgroups; idx++)
-        {
-            memcpy(
-                (void *)((unsigned char *)transaction2OutBuffer
-                         + idx * transaction2CopiesPerWorkgroup * elementSize),
-                (const void *)((unsigned char *)transaction1SrcBuffer
-                               + (idx * transaction1CopiesPerWorkgroup
-                                  + (transaction1CopiesPerWorkgroup
-                                     - transaction2CopiesPerWorkgroup))
-                                   * elementSize),
-                (size_t)transaction2CopiesPerWorkgroup * elementSize);
-        }
-        free(transaction1SrcBuffer);
-    }
-    else
-    {
-        error = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0,
-                                    transaction2GlobalBufferSize,
-                                    transaction2OutBuffer, 0, NULL, NULL);
-        test_error(error, "Unable to read results");
-    }
-    // Verify
-    int failuresPrinted = 0;
-    if (memcmp(transaction1InBuffer, transaction1OutBuffer,
-               transaction1GlobalBufferSize)
-        != 0)
-    {
-        size_t typeSize = get_explicit_type_size(vecType) * vecSize;
-        unsigned char *inchar = (unsigned char *)transaction1InBuffer;
-        unsigned char *outchar = (unsigned char *)transaction1OutBuffer;
-        for (int i = 0; i < (int)transaction1GlobalBufferSize;
-             i += (int)elementSize)
-        {
-            if (memcmp(((char *)inchar) + i, ((char *)outchar) + i, typeSize)
-                != 0)
-            {
-                char values[4096];
-                values[0] = 0;
-                if (failuresPrinted == 0)
-                {
-                    // Print first failure message
-                    log_error("ERROR: Results of 1st transaction did not "
-                              "validate!\n");
-                }
-                sprintf(values + strlen(values), "%d -> [", i);
-                for (int j = 0; j < (int)elementSize; j++)
-                    sprintf(values + strlen(values), "%2x ", inchar[i + j]);
-                sprintf(values + strlen(values), "] != [");
-                for (int j = 0; j < (int)elementSize; j++)
-                    sprintf(values + strlen(values), "%2x ", outchar[i + j]);
-                sprintf(values + strlen(values), "]");
-                log_error("%s\n", values);
-                failuresPrinted++;
-            }
-            if (failuresPrinted > 5)
-            {
-                log_error("Not printing further failures...\n");
-                break;
-            }
-        }
-    }
-    if (memcmp(transaction2InBuffer, transaction2OutBuffer,
-               transaction2GlobalBufferSize)
-        != 0)
-    {
-        size_t typeSize = get_explicit_type_size(vecType) * vecSize;
-        unsigned char *inchar = (unsigned char *)transaction2InBuffer;
-        unsigned char *outchar = (unsigned char *)transaction2OutBuffer;
-        for (int i = 0; i < (int)transaction2GlobalBufferSize;
-             i += (int)elementSize)
-        {
-            if (memcmp(((char *)inchar) + i, ((char *)outchar) + i, typeSize)
-                != 0)
-            {
-                char values[4096];
-                values[0] = 0;
-                if (failuresPrinted == 0)
-                {
-                    // Print first failure message
-                    log_error("ERROR: Results of 2nd transaction did not "
-                              "validate!\n");
-                }
-                sprintf(values + strlen(values), "%d -> [", i);
-                for (int j = 0; j < (int)elementSize; j++)
-                    sprintf(values + strlen(values), "%2x ", inchar[i + j]);
-                sprintf(values + strlen(values), "] != [");
-                for (int j = 0; j < (int)elementSize; j++)
-                    sprintf(values + strlen(values), "%2x ", outchar[i + j]);
-                sprintf(values + strlen(values), "]");
-                log_error("%s\n", values);
-                failuresPrinted++;
-            }
-            if (failuresPrinted > 5)
-            {
-                log_error("Not printing further failures...\n");
-                break;
-            }
-        }
-    }
-    free(transaction1InBuffer);
-    free(transaction1OutBuffer);
-    free(transaction2InBuffer);
-    free(transaction2OutBuffer);
-    return failuresPrinted ? -1 : 0;
-int test_copy_fence_all_types(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, const char *kernelCode,
-                              bool export_after_import, bool aliased_local_mem,
-                              bool aliased_global_mem)
-    ExplicitType vecType[] = {
-        kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
-        kULong, kFloat, kDouble, kNumExplicitTypes
-    };
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int size, typeIndex;
-    int errors = 0;
-    if (!is_extension_available(deviceID, "cl_khr_async_work_group_copy_fence"))
-    {
-        log_info(
-            "Device does not support extended async copies fence. Skipping "
-            "test.\n");
-        return 0;
-    }
-    for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
-    {
-        if (vecType[typeIndex] == kDouble
-            && !is_extension_available(deviceID, "cl_khr_fp64"))
-            continue;
-        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
-            && !gHasLong)
-            continue;
-        for (size = 0; vecSizes[size] != 0; size++)
-        {
-            if (test_copy_fence(deviceID, context, queue, kernelCode,
-                                vecType[typeIndex], vecSizes[size],
-                                export_after_import, aliased_local_mem,
-                                aliased_global_mem))
-            {
-                errors++;
-            }
-        }
-    }
-    if (errors) return -1;
-    return 0;
-int test_async_work_group_copy_fence_import_after_export_aliased_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-    return test_copy_fence_all_types(deviceID, context, queue,
-                                     import_after_export_aliased_local_kernel,
-                                     false, true, false);
-int test_async_work_group_copy_fence_import_after_export_aliased_global(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-    return test_copy_fence_all_types(deviceID, context, queue,
-                                     import_after_export_aliased_global_kernel,
-                                     false, false, true);
-int test_async_work_group_copy_fence_import_after_export_aliased_global_and_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-    return test_copy_fence_all_types(
-        deviceID, context, queue,
-        import_after_export_aliased_global_and_local_kernel, false, true, true);
-int test_async_work_group_copy_fence_export_after_import_aliased_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-    return test_copy_fence_all_types(deviceID, context, queue,
-                                     export_after_import_aliased_local_kernel,
-                                     true, true, false);
-int test_async_work_group_copy_fence_export_after_import_aliased_global(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-    return test_copy_fence_all_types(deviceID, context, queue,
-                                     export_after_import_aliased_global_kernel,
-                                     true, false, true);
-int test_async_work_group_copy_fence_export_after_import_aliased_global_and_local(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements)
-    return test_copy_fence_all_types(
-        deviceID, context, queue,
-        export_after_import_aliased_global_and_local_kernel, true, true, true);
diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp
index c456f38..fe76c84 100644
--- a/test_conformance/basic/test_async_strided_copy.cpp
+++ b/test_conformance/basic/test_async_strided_copy.cpp
@@ -215,8 +215,6 @@
                 sprintf(values + strlen( values), "%2x ", outchar[j]);
             sprintf(values + strlen(values), "]");
             log_error("%s\n", values);
-            free(inBuffer);
-            free(outBuffer);
             return -1;
diff --git a/test_conformance/basic/test_barrier.cpp b/test_conformance/basic/test_barrier.cpp
index d20af14..822b8eb 100644
--- a/test_conformance/basic/test_barrier.cpp
+++ b/test_conformance/basic/test_barrier.cpp
@@ -108,15 +108,11 @@
     input_ptr = (int*)malloc(sizeof(int) * num_elements);
     output_ptr = (int*)malloc(sizeof(int));
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
     test_error(err, "clCreateBuffer failed.");
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int), NULL, &err);
     test_error(err, "clCreateBuffer failed.");
-    streams[2] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_int) * max_threadgroup_size, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * max_threadgroup_size, NULL, &err);
     test_error(err, "clCreateBuffer failed.");
     d = init_genrand( gRandomSeed );
diff --git a/test_conformance/basic/test_basic_parameter_types.cpp b/test_conformance/basic/test_basic_parameter_types.cpp
index 6e99d46..886da6a 100644
--- a/test_conformance/basic/test_basic_parameter_types.cpp
+++ b/test_conformance/basic/test_basic_parameter_types.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -23,381 +23,279 @@
 #include "procs.h"
-const char *kernel_code = R"(
-__kernel void test_kernel(
-char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,
-__global float%s *result)
+const char *kernel_code =
+"__kernel void test_kernel(\n"
+"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
+"__global float%s *result)\n"
+"  result[0] = %s(c);\n"
+"  result[1] = %s(uc);\n"
+"  result[2] = %s(s);\n"
+"  result[3] = %s(us);\n"
+"  result[4] = %s(i);\n"
+"  result[5] = %s(ui);\n"
+"  result[6] = f;\n"
+const char *kernel_code_long =
+"__kernel void test_kernel_long(\n"
+"long%s l, ulong%s ul,\n"
+"__global float%s *result)\n"
+"  result[0] = %s(l);\n"
+"  result[1] = %s(ul);\n"
+int test_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-    result[0] = %s(c);
-    result[1] = %s(uc);
-    result[2] = %s(s);
-    result[3] = %s(us);
-    result[4] = %s(i);
-    result[5] = %s(ui);
-    result[6] = f;
+  clMemWrapper results;
+  int error;
+  size_t global[3] = {1, 1, 1};
+  float results_back[2*16];
+  int count, index;
+  const char* types[] = { "long", "ulong" };
+  char kernel_string[8192];
+  int sizes[] = {1, 2, 4, 8, 16};
+  const char* size_strings[] = {"", "2", "4", "8", "16"};
+  float expected;
+  int total_errors = 0;
+  int size_to_test;
+  char *ptr;
+  char convert_string[1024];
+  size_t max_parameter_size;
-const char *kernel_code_long = R"(
-__kernel void test_kernel_long(
-long%s l, ulong%s ul,
-__global float%s *result)
-    result[0] = %s(l);
-    result[1] = %s(ul);
+  // We don't really care about the contents since we're just testing that the types work.
+  cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-int test_parameter_types_long(cl_device_id device, cl_context context,
-                              cl_command_queue queue, int num_elements)
-    clMemWrapper results;
-    int error;
-    size_t global[3] = { 1, 1, 1 };
-    float results_back[2 * 16];
-    int count, index;
-    const char *types[] = { "long", "ulong" };
-    char kernel_string[8192];
-    int sizes[] = { 1, 2, 4, 8, 16 };
-    const char *size_strings[] = { "", "2", "4", "8", "16" };
-    float expected;
-    int total_errors = 0;
-    int size_to_test;
-    char *ptr;
-    char convert_string[1024];
-    size_t max_parameter_size;
+  // Calculate how large our paramter size is to the kernel
+  size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
-    // We don't really care about the contents since we're just testing that the
-    // types work.
-    cl_long l[16] = { -21, -1, 2,  -3,  4,  -5,  6,  -7,
-                      8,   -9, 10, -11, 12, -13, 14, -15 };
-    cl_ulong ul[16] = { 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+  // Init our strings.
+  kernel_string[0] = '\0';
+  convert_string[0] = '\0';
-    // Calculate how large our paramter size is to the kernel
-    size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
+  // Get the maximum parameter size allowed
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
-    // Init our strings.
-    kernel_string[0] = '\0';
-    convert_string[0] = '\0';
+  // Create the results buffer
+  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
+  test_error(error, "clCreateBuffer failed");
-    // Get the maximum parameter size allowed
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_MAX_PARAMETER_SIZE,
-                        sizeof(max_parameter_size), &max_parameter_size, NULL);
-    test_error(error, "Unable to get max parameter size from device");
+  // Go over all the vector sizes
+  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
-    // Create the results buffer
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             sizeof(cl_float) * 2 * 16, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
-    // Go over all the vector sizes
-    for (size_to_test = 0; size_to_test < 5; size_to_test++)
-    {
-        clProgramWrapper program;
-        clKernelWrapper kernel;
-        size_t total_parameter_size =
-            parameter_size * sizes[size_to_test] + sizeof(cl_mem);
-        if (total_parameter_size > max_parameter_size)
-        {
-            log_info(
-                "Can not test with vector size %d because it would exceed the "
-                "maximum allowed parameter size to the kernel. (%d > %d)\n",
-                (int)sizes[size_to_test], (int)total_parameter_size,
-                (int)max_parameter_size);
-            continue;
-        }
-        log_info("Testing vector size %d\n", sizes[size_to_test]);
-        // If size is > 1, then we need a explicit convert call.
-        if (sizes[size_to_test] > 1)
-        {
-            sprintf(convert_string, "convert_float%s",
-                    size_strings[size_to_test]);
-        }
-        else
-        {
-            sprintf(convert_string, " ");
-        }
-        // Build the kernel
-        sprintf(kernel_string, kernel_code_long, size_strings[size_to_test],
-                size_strings[size_to_test], size_strings[size_to_test],
-                convert_string, convert_string);
-        ptr = kernel_string;
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            (const char **)&ptr,
-                                            "test_kernel_long");
-        test_error(error, "create single kernel failed");
-        // Set the arguments
-        for (count = 0; count < 2; count++)
-        {
-            switch (count)
-            {
-                case 0:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_long) * sizes[size_to_test],
-                        &l);
-                    break;
-                case 1:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_ulong) * sizes[size_to_test],
-                        &ul);
-                    break;
-                default: log_error("Test error"); break;
-            }
-            if (error)
-                log_error("Setting kernel arg %d %s%s: ", count, types[count],
-                          size_strings[size_to_test]);
-            test_error(error, "clSetKernelArgs failed");
-        }
-        error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
-        test_error(error, "clSetKernelArgs failed");
-        // Execute
-        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0,
-                                       NULL, NULL);
-        test_error(error, "clEnqueueNDRangeKernel failed");
-        // Read back the results
-        error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
-                                    sizeof(cl_float) * 2 * 16, results_back, 0,
-                                    NULL, NULL);
-        test_error(error, "clEnqueueReadBuffer failed");
-        // Verify the results
-        for (count = 0; count < 2; count++)
-        {
-            for (index = 0; index < sizes[size_to_test]; index++)
-            {
-                switch (count)
-                {
-                    case 0: expected = (float)l[index]; break;
-                    case 1: expected = (float)ul[index]; break;
-                    default: log_error("Test error"); break;
-                }
-                if (results_back[count * sizes[size_to_test] + index]
-                    != expected)
-                {
-                    total_errors++;
-                    log_error("Conversion from %s%s failed: index %d got %g, "
-                              "expected %g.\n",
-                              types[count], size_strings[size_to_test], index,
-                              results_back[count * sizes[size_to_test] + index],
-                              expected);
-                }
-            }
-        }
+    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
+    if (total_parameter_size > max_parameter_size) {
+      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
+               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
+      continue;
-    return total_errors;
+    log_info("Testing vector size %d\n", sizes[size_to_test]);
+    // If size is > 1, then we need a explicit convert call.
+    if (sizes[size_to_test] > 1) {
+      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
+    } else {
+      sprintf(convert_string, " ");
+    }
+    // Build the kernel
+    sprintf(kernel_string, kernel_code_long,
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            convert_string, convert_string
+    );
+    ptr = kernel_string;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
+    test_error(error, "create single kernel failed");
+    // Set the arguments
+    for (count = 0; count < 2; count++) {
+      switch (count) {
+        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
+        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
+        default: log_error("Test error"); break;
+      }
+      if (error)
+        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
+      test_error(error, "clSetKernelArgs failed");
+    }
+    error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
+    test_error(error, "clSetKernelArgs failed");
+    // Execute
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+    // Read back the results
+    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+    // Verify the results
+    for (count = 0; count < 2; count++) {
+      for (index=0; index < sizes[size_to_test]; index++) {
+        switch (count) {
+          case 0: expected = (float)l[index]; break;
+          case 1: expected = (float)ul[index]; break;
+          default: log_error("Test error"); break;
+        }
+        if (results_back[count*sizes[size_to_test]+index] != expected) {
+          total_errors++;
+          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
+                    index, results_back[count*sizes[size_to_test]+index], expected);
+        }
+      }
+    }
+  }
+  return total_errors;
-int test_parameter_types(cl_device_id device, cl_context context,
-                         cl_command_queue queue, int num_elements)
+int test_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-    clMemWrapper results;
-    int error;
-    size_t global[3] = { 1, 1, 1 };
-    float results_back[7 * 16];
-    int count, index;
-    const char *types[] = { "char", "uchar", "short", "ushort",
-                            "int",  "uint",  "float" };
-    char kernel_string[8192];
-    int sizes[] = { 1, 2, 4, 8, 16 };
-    const char *size_strings[] = { "", "2", "4", "8", "16" };
-    float expected;
-    int total_errors = 0;
-    int size_to_test;
-    char *ptr;
-    char convert_string[1024];
-    size_t max_parameter_size;
+  clMemWrapper results;
+  int error;
+  size_t global[3] = {1, 1, 1};
+  float results_back[7*16];
+  int count, index;
+  const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
+  char kernel_string[8192];
+  int sizes[] = {1, 2, 4, 8, 16};
+  const char* size_strings[] = {"", "2", "4", "8", "16"};
+  float expected;
+  int total_errors = 0;
+  int size_to_test;
+  char *ptr;
+  char convert_string[1024];
+  size_t max_parameter_size;
-    // We don't really care about the contents since we're just testing that the
-    // types work.
-    cl_char c[16] = { 0, -1, 2,  -3,  4,  -5,  6,  -7,
-                      8, -9, 10, -11, 12, -13, 14, -15 };
-    cl_uchar uc[16] = { 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-    cl_short s[16] = { -17, -1, 2,  -3,  4,  -5,  6,  -7,
-                       8,   -9, 10, -11, 12, -13, 14, -15 };
-    cl_ushort us[16] = {
-        18, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
-    };
-    cl_int i[16] = { -19, -1, 2,  -3,  4,  -5,  6,  -7,
-                     8,   -9, 10, -11, 12, -13, 14, -15 };
-    cl_uint ui[16] = { 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
-    cl_float f[16] = { -23, -1, 2,  -3,  4,  -5,  6,  -7,
-                       8,   -9, 10, -11, 12, -13, 14, -15 };
+  // We don't really care about the contents since we're just testing that the types work.
+  cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
-    // Calculate how large our paramter size is to the kernel
-    size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar)
-        + sizeof(cl_short) + sizeof(cl_ushort) + sizeof(cl_int)
-        + sizeof(cl_uint) + sizeof(cl_float);
+  // Calculate how large our paramter size is to the kernel
+  size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
+  sizeof(cl_short) +sizeof(cl_ushort) +
+  sizeof(cl_int) +sizeof(cl_uint) +
+  sizeof(cl_float);
-    // Init our strings.
-    kernel_string[0] = '\0';
-    convert_string[0] = '\0';
+  // Init our strings.
+  kernel_string[0] = '\0';
+  convert_string[0] = '\0';
-    // Get the maximum parameter size allowed
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_MAX_PARAMETER_SIZE,
-                        sizeof(max_parameter_size), &max_parameter_size, NULL);
-    test_error(error, "Unable to get max parameter size from device");
+  // Get the maximum parameter size allowed
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
-    // Create the results buffer
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             sizeof(cl_float) * 7 * 16, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
+  // Create the results buffer
+  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
+  test_error(error, "clCreateBuffer failed");
-    // Go over all the vector sizes
-    for (size_to_test = 0; size_to_test < 5; size_to_test++)
-    {
-        clProgramWrapper program;
-        clKernelWrapper kernel;
+  // Go over all the vector sizes
+  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
-        size_t total_parameter_size =
-            parameter_size * sizes[size_to_test] + sizeof(cl_mem);
-        if (total_parameter_size > max_parameter_size)
-        {
-            log_info(
-                "Can not test with vector size %d because it would exceed the "
-                "maximum allowed parameter size to the kernel. (%d > %d)\n",
-                (int)sizes[size_to_test], (int)total_parameter_size,
-                (int)max_parameter_size);
-            continue;
-        }
-        log_info("Testing vector size %d\n", sizes[size_to_test]);
-        // If size is > 1, then we need a explicit convert call.
-        if (sizes[size_to_test] > 1)
-        {
-            sprintf(convert_string, "convert_float%s",
-                    size_strings[size_to_test]);
-        }
-        else
-        {
-            sprintf(convert_string, " ");
-        }
-        // Build the kernel
-        sprintf(kernel_string, kernel_code, size_strings[size_to_test],
-                size_strings[size_to_test], size_strings[size_to_test],
-                size_strings[size_to_test], size_strings[size_to_test],
-                size_strings[size_to_test], size_strings[size_to_test],
-                size_strings[size_to_test], convert_string, convert_string,
-                convert_string, convert_string, convert_string, convert_string);
-        ptr = kernel_string;
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            (const char **)&ptr, "test_kernel");
-        test_error(error, "create single kernel failed");
-        // Set the arguments
-        for (count = 0; count < 7; count++)
-        {
-            switch (count)
-            {
-                case 0:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_char) * sizes[size_to_test],
-                        &c);
-                    break;
-                case 1:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_uchar) * sizes[size_to_test],
-                        &uc);
-                    break;
-                case 2:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_short) * sizes[size_to_test],
-                        &s);
-                    break;
-                case 3:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_ushort) * sizes[size_to_test],
-                        &us);
-                    break;
-                case 4:
-                    error = clSetKernelArg(kernel, count,
-                                           sizeof(cl_int) * sizes[size_to_test],
-                                           &i);
-                    break;
-                case 5:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_uint) * sizes[size_to_test],
-                        &ui);
-                    break;
-                case 6:
-                    error = clSetKernelArg(
-                        kernel, count, sizeof(cl_float) * sizes[size_to_test],
-                        &f);
-                    break;
-                default: log_error("Test error"); break;
-            }
-            if (error)
-                log_error("Setting kernel arg %d %s%s: ", count, types[count],
-                          size_strings[size_to_test]);
-            test_error(error, "clSetKernelArgs failed");
-        }
-        error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
-        test_error(error, "clSetKernelArgs failed");
-        // Execute
-        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0,
-                                       NULL, NULL);
-        test_error(error, "clEnqueueNDRangeKernel failed");
-        // Read back the results
-        error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
-                                    sizeof(cl_float) * 7 * 16, results_back, 0,
-                                    NULL, NULL);
-        test_error(error, "clEnqueueReadBuffer failed");
-        // Verify the results
-        for (count = 0; count < 7; count++)
-        {
-            for (index = 0; index < sizes[size_to_test]; index++)
-            {
-                switch (count)
-                {
-                    case 0: expected = (float)c[index]; break;
-                    case 1: expected = (float)uc[index]; break;
-                    case 2: expected = (float)s[index]; break;
-                    case 3: expected = (float)us[index]; break;
-                    case 4: expected = (float)i[index]; break;
-                    case 5: expected = (float)ui[index]; break;
-                    case 6: expected = (float)f[index]; break;
-                    default: log_error("Test error"); break;
-                }
-                if (results_back[count * sizes[size_to_test] + index]
-                    != expected)
-                {
-                    total_errors++;
-                    log_error("Conversion from %s%s failed: index %d got %g, "
-                              "expected %g.\n",
-                              types[count], size_strings[size_to_test], index,
-                              results_back[count * sizes[size_to_test] + index],
-                              expected);
-                }
-            }
-        }
+    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
+    if (total_parameter_size > max_parameter_size) {
+      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
+               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
+      continue;
-    if (gHasLong)
-    {
-        log_info("Testing long types...\n");
-        total_errors +=
-            test_parameter_types_long(device, context, queue, num_elements);
-    }
-    else
-    {
-        log_info("Longs unsupported, skipping.");
+    log_info("Testing vector size %d\n", sizes[size_to_test]);
+    // If size is > 1, then we need a explicit convert call.
+    if (sizes[size_to_test] > 1) {
+      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
+    } else {
+      sprintf(convert_string, " ");
-    return total_errors;
+    // Build the kernel
+    sprintf(kernel_string, kernel_code,
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            size_strings[size_to_test], size_strings[size_to_test],
+            convert_string, convert_string, convert_string,
+            convert_string, convert_string, convert_string
+    );
+    ptr = kernel_string;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
+    test_error(error, "create single kernel failed");
+    // Set the arguments
+    for (count = 0; count < 7; count++) {
+      switch (count) {
+        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
+        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
+        case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
+        case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
+        case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
+        case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
+        case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
+        default: log_error("Test error"); break;
+      }
+      if (error)
+        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
+      test_error(error, "clSetKernelArgs failed");
+    }
+    error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
+    test_error(error, "clSetKernelArgs failed");
+    // Execute
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+    // Read back the results
+    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+    // Verify the results
+    for (count = 0; count < 7; count++) {
+      for (index=0; index < sizes[size_to_test]; index++) {
+        switch (count) {
+          case 0: expected = (float)c[index]; break;
+          case 1: expected = (float)uc[index]; break;
+          case 2: expected = (float)s[index]; break;
+          case 3: expected = (float)us[index]; break;
+          case 4: expected = (float)i[index]; break;
+          case 5: expected = (float)ui[index]; break;
+          case 6: expected = (float)f[index]; break;
+          default: log_error("Test error"); break;
+        }
+        if (results_back[count*sizes[size_to_test]+index] != expected) {
+          total_errors++;
+          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
+                    index, results_back[count*sizes[size_to_test]+index], expected);
+        }
+      }
+    }
+  }
+  if (gHasLong) {
+    log_info("Testing long types...\n");
+    total_errors += test_parameter_types_long( device, context, queue, num_elements );
+  }
+  else {
+    log_info("Longs unsupported, skipping.");
+  }
+  return total_errors;
diff --git a/test_conformance/basic/test_constant.cpp b/test_conformance/basic/test_constant.cpp
index ed25c6e..2bd661a 100644
--- a/test_conformance/basic/test_constant.cpp
+++ b/test_conformance/basic/test_constant.cpp
@@ -140,22 +140,19 @@
     tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
     tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
     out  = (cl_float*)malloc(sizeof(cl_float) * constant_values);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * constant_values, NULL, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * constant_values, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * constant_values, NULL, NULL);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * constant_values, NULL, NULL);
     if (!streams[2])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp
index f52162a..3afd22b 100644
--- a/test_conformance/basic/test_enqueued_local_size.cpp
+++ b/test_conformance/basic/test_enqueued_local_size.cpp
@@ -74,42 +74,14 @@
     size_t localsize[2];
     int err;
-    // For an OpenCL-3.0 device that does not support non-uniform work-groups
-    // we cannot enqueue local sizes which do not divide the global dimensions
-    // but we can still run the test checking that get_enqueued_local_size ==
-    // get_local_size.
-    bool use_uniform_work_groups{ false };
-    if (get_device_cl_version(device) >= Version(3, 0))
-    {
-        cl_bool areNonUniformWorkGroupsSupported = false;
-        err = clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
-                              sizeof(areNonUniformWorkGroupsSupported),
-                              &areNonUniformWorkGroupsSupported, nullptr);
-        test_error_ret(err, "clGetDeviceInfo failed.", TEST_FAIL);
-        if (CL_FALSE == areNonUniformWorkGroupsSupported)
-        {
-            log_info("Non-uniform work group sizes are not supported, "
-                     "enqueuing with uniform workgroups\n");
-            use_uniform_work_groups = true;
-        }
-    }
     output_ptr   = (int*)malloc(2 * sizeof(int));
-    streams =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(int), NULL, &err);
+    streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), 2*sizeof(int), NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    std::string cl_std = "-cl-std=CL";
-    cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
-    err = create_single_kernel_helper_with_build_options(
-        context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
-        "test_enqueued_local_size_1d", cl_std.c_str());
+    err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code, "test_enqueued_local_size_1d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
-    err = create_single_kernel_helper_with_build_options(
-        context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
-        "test_enqueued_local_size_2d", cl_std.c_str());
+    err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code, "test_enqueued_local_size_2d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
     err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
@@ -126,20 +98,6 @@
     localsize[0] = MIN(16, max_wgs);
     localsize[1] = MIN(11, max_wgs / localsize[0]);
-    // If we need to use uniform workgroups because non-uniform workgroups are
-    // not supported, round up to the next global size that is divisible by the
-    // local size.
-    if (use_uniform_work_groups)
-    {
-        if (globalsize[0] % localsize[0])
-        {
-            globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
-        }
-        if (globalsize[1] % localsize[1])
-        {
-            globalsize[1] += (localsize[1] - (globalsize[1] % localsize[1]));
-        }
-    }
     err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
     test_error( err, "clEnqueueNDRangeKernel failed.");
@@ -151,10 +109,6 @@
     globalsize[0] = (size_t)num_elements;
     localsize[0] = 9;
-    if (use_uniform_work_groups && (globalsize[0] % localsize[0]))
-    {
-        globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
-    }
     err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
     test_error( err, "clEnqueueNDRangeKernel failed.");
diff --git a/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/basic/test_explicit_s2v.cpp
index bf38636..34e7da9 100644
--- a/test_conformance/basic/test_explicit_s2v.cpp
+++ b/test_conformance/basic/test_explicit_s2v.cpp
@@ -146,11 +146,9 @@
         return -1;
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                paramSize * count, inputData, &error);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
     test_error( error, "clCreateBuffer failed");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, destStride * count,
-                                NULL, &error);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  destStride * count, NULL, &error);
     test_error( error, "clCreateBuffer failed");
     /* Set the arguments */
diff --git a/test_conformance/basic/test_float2int.cpp b/test_conformance/basic/test_float2int.cpp
index 4063a95..b6af322 100644
--- a/test_conformance/basic/test_float2int.cpp
+++ b/test_conformance/basic/test_float2int.cpp
@@ -69,15 +69,13 @@
     input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp
index 6e5deb4..4ed81cc 100644
--- a/test_conformance/basic/test_fpmath_float.cpp
+++ b/test_conformance/basic/test_fpmath_float.cpp
@@ -161,13 +161,13 @@
     input_ptr[2] = (cl_float*)malloc(length);
     output_ptr   = (cl_float*)malloc(length);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
     p = input_ptr[0];
diff --git a/test_conformance/basic/test_fpmath_float2.cpp b/test_conformance/basic/test_fpmath_float2.cpp
index 1881b4b..a964f6a 100644
--- a/test_conformance/basic/test_fpmath_float2.cpp
+++ b/test_conformance/basic/test_fpmath_float2.cpp
@@ -160,13 +160,13 @@
     input_ptr[2] = (cl_float*)malloc(length);
     output_ptr   = (cl_float*)malloc(length);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
     p = input_ptr[0];
diff --git a/test_conformance/basic/test_fpmath_float4.cpp b/test_conformance/basic/test_fpmath_float4.cpp
index 999c8ec..275b4f3 100644
--- a/test_conformance/basic/test_fpmath_float4.cpp
+++ b/test_conformance/basic/test_fpmath_float4.cpp
@@ -160,13 +160,13 @@
     input_ptr[2] = (cl_float*)malloc(length);
     output_ptr   = (cl_float*)malloc(length);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
     test_error( err, "clCreateBuffer failed.");
     p = input_ptr[0];
diff --git a/test_conformance/basic/test_get_linear_ids.cpp b/test_conformance/basic/test_get_linear_ids.cpp
index 3496fd0..fba200a 100644
--- a/test_conformance/basic/test_get_linear_ids.cpp
+++ b/test_conformance/basic/test_get_linear_ids.cpp
@@ -59,8 +59,7 @@
     // Create the kernel
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        linear_ids_source, "test_linear_ids");
+    error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, linear_ids_source, "test_linear_ids", "-cl-std=CL2.0");
     if (error)
         return error;
diff --git a/test_conformance/basic/test_global_linear_id.cpp b/test_conformance/basic/test_global_linear_id.cpp
index 046d12a..7bed5b8 100644
--- a/test_conformance/basic/test_global_linear_id.cpp
+++ b/test_conformance/basic/test_global_linear_id.cpp
@@ -70,24 +70,19 @@
     cl_kernel kernel[2];
     int *output_ptr;
-    size_t threads[2];
-    int err;
-    num_elements = (int)sqrt((float)num_elements);
-    int length = num_elements * num_elements;
+      size_t threads[2];
+      int err;
+      num_elements = (int)sqrt((float)num_elements);
+      int length = num_elements * num_elements;
-    output_ptr = (int *)malloc(sizeof(int) * length);
+      output_ptr   = (int*)malloc(sizeof(int) * length);
-    streams = clCreateBuffer(context, CL_MEM_READ_WRITE, length * sizeof(int),
-                             NULL, &err);
+    streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length*sizeof(int), NULL, &err);
     test_error( err, "clCreateBuffer failed.");
-    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1,
-                                      &global_linear_id_1d_code,
-                                      "test_global_linear_id_1d");
+    err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &global_linear_id_1d_code, "test_global_linear_id_1d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1,
-                                      &global_linear_id_2d_code,
-                                      "test_global_linear_id_2d");
+    err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &global_linear_id_2d_code, "test_global_linear_id_2d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
     err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
diff --git a/test_conformance/basic/test_global_work_offsets.cpp b/test_conformance/basic/test_global_work_offsets.cpp
index 39b54b4..edaca38 100644
--- a/test_conformance/basic/test_global_work_offsets.cpp
+++ b/test_conformance/basic/test_global_work_offsets.cpp
@@ -138,9 +138,7 @@
     memset( outputA, 0xff, sizeof( outputA ) );
     for( int i = 0; i < 3; i++ )
-        streams[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                           sizeof(outputA), outputA, &error);
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
         test_error( error, "Unable to create output array" );
@@ -230,9 +228,7 @@
     // Create some output streams, and storage for a single control ID
     memset( outOffsets, 0xff, sizeof( outOffsets ) );
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                       sizeof(outOffsets), outOffsets, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
     test_error( error, "Unable to create control ID buffer" );
     // Run a few different times
diff --git a/test_conformance/basic/test_hostptr.cpp b/test_conformance/basic/test_hostptr.cpp
index 65af5c3..f40cb69 100644
--- a/test_conformance/basic/test_hostptr.cpp
+++ b/test_conformance/basic/test_hostptr.cpp
@@ -131,53 +131,39 @@
         make_random_data(num_elements, input_ptr[1], d);
         // Create host-side input
-        streams[0] =
-            clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                           sizeof(cl_float) * num_elements, input_ptr[0], &err);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[0], &err);
         test_error(err, "clCreateBuffer 0 failed");
         // Create a copied input
-        streams[1] =
-            clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                           sizeof(cl_float) * num_elements, input_ptr[1], &err);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[1], &err);
         test_error(err, "clCreateBuffer 1 failed");
         // Create a host-side output
-        streams[2] =
-            clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                           sizeof(cl_float) * num_elements, output_ptr, &err);
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, output_ptr, &err);
         test_error(err, "clCreateBuffer 2 failed");
         // Create a host-side input
         img_format.image_channel_order = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[3] =
-            create_image_2d(context, CL_MEM_USE_HOST_PTR, &img_format,
-                            img_width, img_height, 0, rgba8_inptr, &err);
+        streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
         test_error(err, "create_image_2d 3 failed");
         // Create a copied input
         img_format.image_channel_order = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[4] =
-            create_image_2d(context, CL_MEM_COPY_HOST_PTR, &img_format,
-                            img_width, img_height, 0, rgba8_inptr, &err);
+        streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
         test_error(err, "create_image_2d 4 failed");
         // Create a host-side output
         img_format.image_channel_order = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[5] =
-            create_image_2d(context, CL_MEM_USE_HOST_PTR, &img_format,
-                            img_width, img_height, 0, rgba8_outptr, &err);
+        streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
         test_error(err, "create_image_2d 5 failed");
         // Create a copied output
         img_format.image_channel_data_type = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[6] =
-            create_image_2d(context, CL_MEM_COPY_HOST_PTR, &img_format,
-                            img_width, img_height, 0, rgba8_outptr, &err);
+        streams[6] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
         test_error(err, "create_image_2d 6 failed");
         err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
diff --git a/test_conformance/basic/test_image_multipass.cpp b/test_conformance/basic/test_image_multipass.cpp
index 7f51665..cd91a13 100644
--- a/test_conformance/basic/test_image_multipass.cpp
+++ b/test_conformance/basic/test_image_multipass.cpp
@@ -172,7 +172,7 @@
         cl_mem_flags        flags;
         initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
-        flags = CL_MEM_READ_WRITE;
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
         accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
         if (!accum_streams[0])
@@ -224,7 +224,7 @@
         MTdata              d;
         input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
-        flags = CL_MEM_READ_WRITE;
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
         int i;
         d = init_genrand( gRandomSeed );
@@ -425,7 +425,7 @@
         cl_mem_flags        flags;
         initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
-        flags = CL_MEM_READ_WRITE;
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
         accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
         if (!accum_streams[0])
@@ -469,7 +469,7 @@
         MTdata              d;
         input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
-        flags = CL_MEM_READ_WRITE;
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
         int i;
         d = init_genrand( gRandomSeed );
diff --git a/test_conformance/basic/test_image_r8.cpp b/test_conformance/basic/test_image_r8.cpp
index b633d6a..7805c1b 100644
--- a/test_conformance/basic/test_image_r8.cpp
+++ b/test_conformance/basic/test_image_r8.cpp
@@ -88,9 +88,7 @@
     img_format.image_channel_data_type = CL_UNSIGNED_INT8;
     // early out if this image type is not supported
-    if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
-                                   CL_MEM_OBJECT_IMAGE2D, &img_format))
-    {
+    if( ! is_image_format_supported( context, (cl_mem_flags)(CL_MEM_READ_ONLY), CL_MEM_OBJECT_IMAGE2D, &img_format ) ) {
         log_info("WARNING: Image type not supported; skipping test.\n");
         return 0;
@@ -100,17 +98,14 @@
     free_mtdata(d); d = NULL;
     output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height);
-    streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &img_format,
-                                 img_width, img_height, 0, NULL, NULL);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_ONLY), &img_format, img_width, img_height, 0, NULL, NULL);
     if (!streams[0])
         log_error("create_image_2d failed\n");
         return -1;
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_uchar) * img_width * img_height, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uchar) * img_width*img_height, NULL, NULL);
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp
index 0246d80..4240466 100644
--- a/test_conformance/basic/test_imagearraycopy.cpp
+++ b/test_conformance/basic/test_imagearraycopy.cpp
@@ -38,8 +38,7 @@
   log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
-  image = create_image_2d(context, CL_MEM_READ_WRITE, format, img_width,
-                          img_height, 0, NULL, &err);
+  image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
   test_error(err, "create_image_2d failed");
   err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
@@ -47,7 +46,7 @@
   buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
   test_error(err, "clCreateBuffer failed");
   d = init_genrand( gRandomSeed );
diff --git a/test_conformance/basic/test_imagearraycopy3d.cpp b/test_conformance/basic/test_imagearraycopy3d.cpp
index 19dfdbc..e34aa7d 100644
--- a/test_conformance/basic/test_imagearraycopy3d.cpp
+++ b/test_conformance/basic/test_imagearraycopy3d.cpp
@@ -38,8 +38,7 @@
   log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
-  image = create_image_3d(context, CL_MEM_READ_ONLY, format, img_width,
-                          img_height, img_depth, 0, 0, NULL, &err);
+  image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
   test_error(err, "create_image_3d failed");
   err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
@@ -47,7 +46,7 @@
   buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
   test_error(err, "clCreateBuffer failed");
   d = init_genrand( gRandomSeed );
@@ -122,15 +121,12 @@
-  err = clGetSupportedImageFormats(
-      context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
   test_error(err, "clGetSupportedImageFormats failed");
   formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_ONLY,
-                                   CL_MEM_OBJECT_IMAGE3D, num_formats, formats,
-                                   NULL);
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
   test_error(err, "clGetSupportedImageFormats failed");
   for (i = 0; i < num_formats; i++) {
diff --git a/test_conformance/basic/test_imagecopy.cpp b/test_conformance/basic/test_imagecopy.cpp
index bcb9fef..e74827d 100644
--- a/test_conformance/basic/test_imagecopy.cpp
+++ b/test_conformance/basic/test_imagecopy.cpp
@@ -132,29 +132,23 @@
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[2] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[3] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_FLOAT;
-    streams[4] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[5] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
     for (i=0; i<3; i++)
diff --git a/test_conformance/basic/test_imagedim.cpp b/test_conformance/basic/test_imagedim.cpp
index 008c88b..6d8cdb3 100644
--- a/test_conformance/basic/test_imagedim.cpp
+++ b/test_conformance/basic/test_imagedim.cpp
@@ -38,25 +38,24 @@
-static unsigned char *generate_8888_image(size_t w, size_t h, MTdata d)
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
-    unsigned char *ptr = new unsigned char[4 * w * h];
-    size_t i;
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
-    for (i = 0; i < w * h * 4; i++)
-    {
+    for (i=0; i<w*h*4; i++)
         ptr[i] = (unsigned char)genrand_int32(d);
-    }
     return ptr;
-static int verify_8888_image(unsigned char *image, unsigned char *outptr,
-                             size_t w, size_t h)
+static int
+verify_8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
-    size_t i;
+    int     i;
-    for (i = 0; i < w * h; i++)
+    for (i=0; i<w*h; i++)
         if (outptr[i] != image[i])
             return -1;
@@ -69,18 +68,18 @@
 test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-    cl_mem streams[2];
-    cl_image_format img_format;
-    unsigned char *input_ptr, *output_ptr;
-    cl_program program;
-    cl_kernel kernel;
-    size_t threads[2];
-    cl_ulong max_mem_size;
-    size_t img_width, max_img_width;
-    size_t img_height, max_img_height;
-    size_t max_img_dim;
-    int i, j, i2, j2, err = 0;
-    size_t max_image2d_width, max_image2d_height;
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[2];
+     cl_ulong    max_mem_size;
+    int                img_width, max_img_width;
+    int                img_height, max_img_height;
+    int                max_img_dim;
+    int                i, j, i2, j2, err=0;
+    size_t            max_image2d_width, max_image2d_height;
     int total_errors = 0;
     MTdata  d;
@@ -121,15 +120,15 @@
     cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
     test_error(err, "clCreateSampler failed");
-    max_img_width = max_image2d_width;
-    max_img_height = max_image2d_height;
+    max_img_width = (int)max_image2d_width;
+    max_img_height = (int)max_image2d_height;
     // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
   //  and we want to consume 1/4 of global memory (this is the minimum required to be
   //  supported by the spec)
     max_mem_size /= 4; // use 1/4
     max_mem_size /= 4; // 4 bytes per pixel
-    max_img_dim = (size_t)sqrt((double)max_mem_size);
+    max_img_dim = (int)sqrt((double)max_mem_size);
     // convert to a power of 2
         unsigned int    n = (unsigned int)max_img_dim;
@@ -139,7 +138,7 @@
         while (m > n)
             m >>= 1;
-        max_img_dim = m;
+        max_img_dim = (int)m;
     if (max_img_width > max_img_dim)
@@ -152,41 +151,36 @@
     d = init_genrand( gRandomSeed );
     input_ptr = generate_8888_image(max_img_width, max_img_height, d);
-    output_ptr = new unsigned char[4 * max_img_width * max_img_height];
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
     // test power of 2 width, height starting at 1 to 4K
-    for (i = 1, i2 = 0; i <= max_img_height; i <<= 1, i2++)
+    for (i=1,i2=0; i<=max_img_height; i<<=1,i2++)
         img_height = (1 << i2);
-        for (j = 1, j2 = 0; j <= max_img_width; j <<= 1, j2++)
+        for (j=1,j2=0; j<=max_img_width; j<<=1,j2++)
             img_width = (1 << j2);
             img_format.image_channel_order = CL_RGBA;
             img_format.image_channel_data_type = CL_UNORM_INT8;
-            streams[0] =
-                create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                img_width, img_height, 0, NULL, NULL);
+            streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
             if (!streams[0])
                 log_error("create_image_2d failed.  width = %d, height = %d\n", img_width, img_height);
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 return -1;
             img_format.image_channel_order = CL_RGBA;
             img_format.image_channel_data_type = CL_UNORM_INT8;
-            streams[1] =
-                create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                img_width, img_height, 0, NULL, NULL);
+            streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
             if (!streams[1])
                 log_error("create_image_2d failed.  width = %d, height = %d\n", img_width, img_height);
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 return -1;
@@ -199,8 +193,8 @@
                 log_error("clWriteImage failed\n");
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 return -1;
@@ -213,8 +207,8 @@
                 log_error("clSetKernelArgs failed\n");
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 return -1;
@@ -230,8 +224,8 @@
                             img_width, img_height);
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 return -1;
@@ -243,8 +237,8 @@
                             img_width, img_height);
-                delete[] input_ptr;
-                delete[] output_ptr;
+                free(input_ptr);
+                free(output_ptr);
                 return -1;
@@ -261,8 +255,8 @@
     // cleanup
-    delete[] input_ptr;
-    delete[] output_ptr;
+    free(input_ptr);
+    free(output_ptr);
@@ -276,18 +270,18 @@
 test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-    cl_mem streams[2];
-    cl_image_format img_format;
-    unsigned char *input_ptr, *output_ptr;
-    cl_program program;
-    cl_kernel kernel;
-    size_t threads[2], local_threads[2];
-    cl_ulong max_mem_size;
-    size_t img_width, max_img_width;
-    size_t img_height, max_img_height;
-    size_t max_img_dim;
-    int i, j, i2, j2, err = 0;
-    size_t max_image2d_width, max_image2d_height;
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[2], local_threads[2];
+    cl_ulong    max_mem_size;
+    int                img_width, max_img_width;
+    int                img_height, max_img_height;
+    int                max_img_dim;
+    int                i, j, i2, j2, err=0;
+    size_t            max_image2d_width, max_image2d_height;
     int total_errors = 0;
     size_t max_local_workgroup_size[3];
     MTdata d;
@@ -367,10 +361,10 @@
     d = init_genrand( gRandomSeed );
     input_ptr = generate_8888_image(max_img_width, max_img_height, d);
-    output_ptr = new unsigned char[4 * max_img_width * max_img_height];
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
     int plus_minus;
-    for (plus_minus = 0; plus_minus < 3; plus_minus++)
+    for (plus_minus=0; plus_minus < 3; plus_minus++)
     // test power of 2 width, height starting at 1 to 4K
@@ -381,8 +375,8 @@
                 img_width = (1 << j2);
-                size_t effective_img_height = img_height;
-                size_t effective_img_width = img_width;
+                int effective_img_height = img_height;
+                int effective_img_width = img_width;
                 local_threads[0] = 1;
                 local_threads[1] = 1;
@@ -410,28 +404,24 @@
                 img_format.image_channel_order = CL_RGBA;
                 img_format.image_channel_data_type = CL_UNORM_INT8;
-                streams[0] = create_image_2d(
-                    context, CL_MEM_READ_WRITE, &img_format,
-                    effective_img_width, effective_img_height, 0, NULL, NULL);
+                streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
                 if (!streams[0])
                     log_error("create_image_2d failed.  width = %d, height = %d\n", effective_img_width, effective_img_height);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     return -1;
                 img_format.image_channel_order = CL_RGBA;
                 img_format.image_channel_data_type = CL_UNORM_INT8;
-                streams[1] = create_image_2d(
-                    context, CL_MEM_READ_WRITE, &img_format,
-                    effective_img_width, effective_img_height, 0, NULL, NULL);
+                streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
                 if (!streams[1])
                     log_error("create_image_2d failed.  width = %d, height = %d\n", effective_img_width, effective_img_height);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     return -1;
@@ -444,8 +434,8 @@
                     log_error("clWriteImage failed\n");
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     return -1;
@@ -458,8 +448,8 @@
                     log_error("clSetKernelArgs failed\n");
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     return -1;
@@ -476,8 +466,8 @@
                                 effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     return -1;
@@ -489,8 +479,8 @@
                                 effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
+                    free(input_ptr);
+                    free(output_ptr);
                     return -1;
@@ -508,15 +498,15 @@
-  // cleanup
-  delete[] input_ptr;
-  delete[] output_ptr;
-  free_mtdata(d);
-  clReleaseSampler(sampler);
-  clReleaseKernel(kernel);
-  clReleaseProgram(program);
+    // cleanup
+    free(input_ptr);
+    free(output_ptr);
+    free_mtdata(d);
+    clReleaseSampler(sampler);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
-  return total_errors;
+    return total_errors;
diff --git a/test_conformance/basic/test_imagenpot.cpp b/test_conformance/basic/test_imagenpot.cpp
index baa5b2e..4713c30 100644
--- a/test_conformance/basic/test_imagenpot.cpp
+++ b/test_conformance/basic/test_imagenpot.cpp
@@ -110,8 +110,8 @@
         img_format.image_channel_order = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                     img_width, img_height, 0, NULL, NULL);
+        streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format,
+                                 img_width, img_height, 0, NULL, NULL);
         if (!streams[0])
             log_error("create_image_2d failed\n");
@@ -120,8 +120,8 @@
         img_format.image_channel_order = CL_RGBA;
         img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                     img_width, img_height, 0, NULL, NULL);
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format,
+                                 img_width, img_height, 0, NULL, NULL);
         if (!streams[1])
             log_error("create_image_2d failed\n");
diff --git a/test_conformance/basic/test_imagerandomcopy.cpp b/test_conformance/basic/test_imagerandomcopy.cpp
index c3355de..494d6c2 100644
--- a/test_conformance/basic/test_imagerandomcopy.cpp
+++ b/test_conformance/basic/test_imagerandomcopy.cpp
@@ -146,29 +146,23 @@
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[2] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[3] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_FLOAT;
-    streams[4] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
-    streams[5] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
     for (i=0; i<3; i++)
diff --git a/test_conformance/basic/test_imagereadwrite.cpp b/test_conformance/basic/test_imagereadwrite.cpp
index c074238..dd1923e 100644
--- a/test_conformance/basic/test_imagereadwrite.cpp
+++ b/test_conformance/basic/test_imagereadwrite.cpp
@@ -215,20 +215,17 @@
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_FLOAT;
-    streams[2] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, &err);
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
     test_error(err, "create_image_2d failed");
     for (i=0; i<3; i++)
diff --git a/test_conformance/basic/test_int2float.cpp b/test_conformance/basic/test_int2float.cpp
index 483698a..d298dc7 100644
--- a/test_conformance/basic/test_int2float.cpp
+++ b/test_conformance/basic/test_int2float.cpp
@@ -68,15 +68,13 @@
     input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/basic/test_local_linear_id.cpp b/test_conformance/basic/test_local_linear_id.cpp
index 279bd71..cdf5ee6 100644
--- a/test_conformance/basic/test_local_linear_id.cpp
+++ b/test_conformance/basic/test_local_linear_id.cpp
@@ -66,29 +66,24 @@
 test_local_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-    cl_mem streams;
-    cl_program program[2];
-    cl_kernel kernel[2];
+      cl_mem streams;
+      cl_program program[2];
+      cl_kernel kernel[2];
     int *output_ptr;
-    size_t threads[2];
-    int err;
-    num_elements = (int)sqrt((float)num_elements);
-    int length = num_elements * num_elements;
+      size_t threads[2];
+      int err;
+      num_elements = (int)sqrt((float)num_elements);
+      int length = num_elements * num_elements;
-    output_ptr = (cl_int *)malloc(sizeof(int) * length);
+      output_ptr   = (cl_int*)malloc(sizeof(int) * length);
-    streams = clCreateBuffer(context, CL_MEM_READ_WRITE, length * sizeof(int),
-                             NULL, &err);
-    test_error(err, "clCreateBuffer failed.");
+    streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length*sizeof(int), NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
-    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1,
-                                      &local_linear_id_1d_code,
-                                      "test_local_linear_id_1d");
+    err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &local_linear_id_1d_code, "test_local_linear_id_1d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1,
-                                      &local_linear_id_2d_code,
-                                      "test_local_linear_id_2d");
+    err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &local_linear_id_2d_code, "test_local_linear_id_2d", "-cl-std=CL2.0");
     test_error( err, "create_single_kernel_helper failed");
     err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
diff --git a/test_conformance/basic/test_multireadimagemultifmt.cpp b/test_conformance/basic/test_multireadimagemultifmt.cpp
index 7fe58d3..5c93d2f 100644
--- a/test_conformance/basic/test_multireadimagemultifmt.cpp
+++ b/test_conformance/basic/test_multireadimagemultifmt.cpp
@@ -136,8 +136,7 @@
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, NULL);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
     if (!streams[0])
         log_error("create_image_2d failed\n");
@@ -145,8 +144,7 @@
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, NULL);
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
     if (!streams[1])
         log_error("create_image_2d failed\n");
@@ -154,17 +152,14 @@
     img_format.image_channel_order = CL_RGBA;
     img_format.image_channel_data_type = CL_FLOAT;
-    streams[2] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                 img_width, img_height, 0, NULL, NULL);
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
     if (!streams[2])
         log_error("create_image_2d failed\n");
         return -1;
-    streams[3] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(float) * 4 * img_width * img_height, NULL, NULL);
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(float)*4 * img_width*img_height, NULL, NULL);
     if (!streams[3])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/basic/test_numeric_constants.cpp b/test_conformance/basic/test_numeric_constants.cpp
index 83687ee..5aeca0e 100644
--- a/test_conformance/basic/test_numeric_constants.cpp
+++ b/test_conformance/basic/test_numeric_constants.cpp
@@ -242,14 +242,11 @@
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float_out),
-                                NULL, &error);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(float_out), NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int_out),
-                                NULL, &error);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(int_out), NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(uint_out),
-                                NULL, &error);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(uint_out), NULL, &error);
     test_error( error, "Creating test array failed" );
     error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
@@ -351,11 +348,9 @@
             return -1;
-        streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    sizeof(long_out), NULL, &error);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(long_out), NULL, &error);
         test_error( error, "Creating test array failed" );
-        streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    sizeof(ulong_out), NULL, &error);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(ulong_out), NULL, &error);
         test_error( error, "Creating test array failed" );
         error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
@@ -394,11 +389,9 @@
             return -1;
-        streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    sizeof(double_out), NULL, &error);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(double_out), NULL, &error);
         test_error( error, "Creating test array failed" );
-        streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    sizeof(long_out), NULL, &error);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(long_out), NULL, &error);
         test_error( error, "Creating test array failed" );
         error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
@@ -540,11 +533,9 @@
     /* Create some I/O streams */
-    intStream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(intOut), NULL,
-                               &error);
+    intStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(intOut), NULL, &error );
     test_error( error, "Creating test array failed" );
-    floatStream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(floatOut),
-                                 NULL, &error);
+    floatStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(floatOut), NULL, &error );
     test_error( error, "Creating test array failed" );
     // Stage 1: basic limits on MAXFLOAT
@@ -686,8 +677,7 @@
                 return -1;
-            doubleStream = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                          sizeof(doubleOut), NULL, &error);
+            doubleStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(doubleOut), NULL, &error );
             test_error( error, "Creating test array failed" );
             error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
diff --git a/test_conformance/basic/test_preprocessors.cpp b/test_conformance/basic/test_preprocessors.cpp
index 2038d15..332f99d 100644
--- a/test_conformance/basic/test_preprocessors.cpp
+++ b/test_conformance/basic/test_preprocessors.cpp
@@ -125,14 +125,11 @@
     /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(results),
-                                NULL, &error);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(results), NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(fileString),
-                                NULL, &error);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(fileString), NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(roundingString), NULL, &error);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(roundingString), NULL, &error);
     test_error( error, "Creating test array failed" );
     // Set up and run
@@ -216,15 +213,33 @@
     // The OpenCL version reported by the macro reports the feature level supported by the compiler. Since
     // this doesn't directly match any property we can query, we just check to see if it's a sane value
-    auto device_cl_version = get_device_cl_version(deviceID);
-    int device_cl_version_int = device_cl_version.to_int() * 10;
-    if ((results[2] < 100) || (results[2] > device_cl_version_int))
+    char versionBuffer[ 128 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( versionBuffer ), versionBuffer, NULL );
+    test_error( error, "Unable to get device's version to validate against" );
+    // We need to parse to get the version number to compare against
+    char *p1, *p2, *p3;
+    for( p1 = versionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
+        ;
+    for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
+        ;
+    for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
+        ;
+    if( p2 == p3 )
-        log_error("ERROR: Kernel preprocessor __OPENCL_VERSION__ does not make "
-                  "sense w.r.t. device's version string! "
-                  "(preprocessor states %d, CL_DEVICE_VERSION is %d (%s))\n",
-                  results[2], device_cl_version_int,
-                  device_cl_version.to_string().c_str());
+        log_error( "ERROR: Unable to verify OpenCL version string (platform string is incorrect format)\n" );
+        return -1;
+    }
+    *p2 = 0;
+    *p3 = 0;
+    int major = atoi( p1 );
+    int minor = atoi( p2 + 1 );
+    int realVersion = ( major * 100 ) + ( minor * 10 );
+    if( ( results[ 2 ] < 100 ) || ( results[ 2 ] > realVersion ) )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ does not make sense w.r.t. device's version string! "
+                  "(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
         return -1;
@@ -235,29 +250,33 @@
         return -1;
-    // The OpenCL C version reported by the macro reports the OpenCL C version
-    // specified to the compiler. We need to see whether it is supported.
-    int cl_c_major_version = results[3] / 100;
-    int cl_c_minor_version = (results[3] / 10) % 10;
-    if ((results[3] < 100)
-        || (!device_supports_cl_c_version(
-            deviceID, Version{ cl_c_major_version, cl_c_minor_version })))
+    // The OpenCL C version reported by the macro reports the OpenCL C supported by the compiler for this OpenCL device.
+    char cVersionBuffer[ 128 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( cVersionBuffer ), cVersionBuffer, NULL );
+    test_error( error, "Unable to get device's OpenCL C version to validate against" );
+    // We need to parse to get the version number to compare against
+    for( p1 = cVersionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
+        ;
+    for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
+        ;
+    for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
+        ;
+    if( p2 == p3 )
-        auto device_version = get_device_cl_c_version(deviceID);
-        log_error(
-            "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ does not make "
-            "sense w.r.t. device's version string! "
-            "(preprocessor states %d, CL_DEVICE_OPENCL_C_VERSION is %d (%s))\n",
-            results[3], device_version.to_int() * 10,
-            device_version.to_string().c_str());
-        log_error("This means that CL_DEVICE_OPENCL_C_VERSION < "
-                  "__OPENCL_C_VERSION__");
-        if (device_cl_version >= Version{ 3, 0 })
-        {
-            log_error(", and __OPENCL_C_VERSION__ does not appear in "
-                      "CL_DEVICE_OPENCL_C_ALL_VERSIONS");
-        }
-        log_error("\n");
+        log_error( "ERROR: Unable to verify OpenCL C version string (platform string is incorrect format)\n" );
+        return -1;
+    }
+    *p2 = 0;
+    *p3 = 0;
+    major = atoi( p1 );
+    minor = atoi( p2 + 1 );
+    realVersion = ( major * 100 ) + ( minor * 10 );
+    if( ( results[ 3 ] < 100 ) || ( results[ 3 ] > realVersion ) )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ does not make sense w.r.t. device's version string! "
+                  "(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
         return -1;
diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index 62c0a6b..9062a54 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -1510,29 +1510,6 @@
     return err;
-// Determines whether its valid to skip this test based on the driver version
-// and the features it optionally supports.
-// Whether the test should be skipped is writen into the out paramter skip.
-// The check returns an error code for the clDeviceInfo query.
-static cl_int should_skip(cl_device_id device, cl_bool& skip)
-    // Assume we can't skip to begin with.
-    skip = CL_FALSE;
-    // Progvar tests are already skipped for OpenCL < 2.0, so here we only need
-    // to test for 3.0 since that is when program scope global variables become
-    // optional.
-    if (get_device_cl_version(device) >= Version(3, 0))
-    {
-        size_t max_global_variable_size{};
-        test_error(clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
-                                   sizeof(max_global_variable_size),
-                                   &max_global_variable_size, nullptr),
-                   "clGetDeviceInfo failed");
-        skip = (max_global_variable_size != 0) ? CL_FALSE : CL_TRUE;
-    }
-    return CL_SUCCESS;
 // Global functions
@@ -1541,18 +1518,6 @@
 // Test support for variables at program scope. Miscellaneous
 int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-    cl_bool skip{ CL_FALSE };
-    auto error = should_skip(device, skip);
-    if (CL_SUCCESS != error)
-    {
-        return TEST_FAIL;
-    }
-    if (skip)
-    {
-        log_info("Skipping progvar_prog_scope_misc since it is optionally not "
-                 "supported on this device\n");
-        return TEST_SKIPPED_ITSELF;
-    }
     size_t max_size = 0;
     size_t pref_size = 0;
@@ -1572,19 +1537,6 @@
 // Test support for variables at program scope. Unitialized data
 int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-    cl_bool skip{ CL_FALSE };
-    auto error = should_skip(device, skip);
-    if (CL_SUCCESS != error)
-    {
-        return TEST_FAIL;
-    }
-    if (skip)
-    {
-        log_info(
-            "Skipping progvar_prog_scope_uninit since it is optionally not "
-            "supported on this device\n");
-        return TEST_SKIPPED_ITSELF;
-    }
     size_t max_size = 0;
     size_t pref_size = 0;
@@ -1601,18 +1553,6 @@
 // Test support for variables at program scope. Initialized data.
 int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-    cl_bool skip{ CL_FALSE };
-    auto error = should_skip(device, skip);
-    if (CL_SUCCESS != error)
-    {
-        return TEST_FAIL;
-    }
-    if (skip)
-    {
-        log_info("Skipping progvar_prog_scope_init since it is optionally not "
-                 "supported on this device\n");
-        return TEST_SKIPPED_ITSELF;
-    }
     size_t max_size = 0;
     size_t pref_size = 0;
@@ -1630,18 +1570,6 @@
 // A simple test for support of static variables inside a kernel.
 int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-    cl_bool skip{ CL_FALSE };
-    auto error = should_skip(device, skip);
-    if (CL_SUCCESS != error)
-    {
-        return TEST_FAIL;
-    }
-    if (skip)
-    {
-        log_info("Skipping progvar_func_scope since it is optionally not "
-                 "supported on this device\n");
-        return TEST_SKIPPED_ITSELF;
-    }
     size_t max_size = 0;
     size_t pref_size = 0;
diff --git a/test_conformance/basic/test_queue_priority.cpp b/test_conformance/basic/test_queue_priority.cpp
index 57ce504..831defe 100644
--- a/test_conformance/basic/test_queue_priority.cpp
+++ b/test_conformance/basic/test_queue_priority.cpp
@@ -235,18 +235,18 @@
     oldMode = get_round();
-  input_ptr[0] = (cl_float *)malloc(length);
-  input_ptr[1] = (cl_float *)malloc(length);
-  input_ptr[2] = (cl_float *)malloc(length);
-  output_ptr = (cl_float *)malloc(length);
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
   test_error( err, "clCreateBuffer failed.");
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
   test_error( err, "clCreateBuffer failed.");
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
   test_error( err, "clCreateBuffer failed.");
-  streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
   test_error( err, "clCreateBuffer failed.");
   p = input_ptr[0];
diff --git a/test_conformance/basic/test_rw_image_access_qualifier.cpp b/test_conformance/basic/test_rw_image_access_qualifier.cpp
index 87e3f60..832ec86 100644
--- a/test_conformance/basic/test_rw_image_access_qualifier.cpp
+++ b/test_conformance/basic/test_rw_image_access_qualifier.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -43,29 +43,6 @@
 int test_rw_image_access_qualifier(cl_device_id device_id, cl_context context, cl_command_queue commands, int num_elements)
-    // This test should be skipped if images are not supported.
-    if (checkForImageSupport(device_id))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
-    // Support for read-write image arguments is required for an
-    // or 2.X device if the device supports images. In OpenCL-3.0
-    // read-write images are optional. This test is already being skipped
-    // for 1.X devices.
-    if (get_device_cl_version(device_id) >= Version(3, 0))
-    {
-        cl_uint are_rw_images_supported{};
-        test_error(
-            clGetDeviceInfo(device_id, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
-                            sizeof(are_rw_images_supported),
-                            &are_rw_images_supported, nullptr),
-            "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_IMAGE_ARGS\n");
-        if (0 == are_rw_images_supported)
-        {
-            return TEST_SKIPPED_ITSELF;
-        }
-    }
     unsigned int i;
@@ -109,8 +86,7 @@
     /* Build the program executable */
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &rw_kernel_code, "test_rw_images");
+  err = create_single_kernel_helper_with_build_options(context,&program,&kernel,1,&rw_kernel_code,"test_rw_images", "-cl-std=CL2.0");
     if (err != CL_SUCCESS || !program) {
         log_error("Error: clCreateProgramWithSource failed\n");
     return err;
@@ -121,7 +97,8 @@
     format.image_channel_data_type = CL_UNSIGNED_INT32;
     /* Create input image */
+    flags = (cl_mem_flags) (CL_MEM_READ_WRITE
+                            | CL_MEM_COPY_HOST_PTR);
     src_image = create_image_2d(context, flags, &format,
                                 size_x, size_y, 0,
                                 (void *)input, &err);
diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp
index 66a6c56..fc3fd0a 100644
--- a/test_conformance/basic/test_sizeof.cpp
+++ b/test_conformance/basic/test_sizeof.cpp
@@ -49,8 +49,13 @@
         sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr);
+    bool deviceLt20 = false;
+    Version version = get_device_cl_version(device);
+    if (version < Version(2,0)) {
+        deviceLt20 = true;
+    }
+    cl_int err = create_single_kernel_helper_with_build_options(context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", deviceLt20 ? "" : "-cl-std=CL2.0");
     if( err )
         return err;
diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp
deleted file mode 100644
index 5ab3ea4..0000000
--- a/test_conformance/basic/test_vector_swizzle.cpp
+++ /dev/null
@@ -1,699 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <algorithm>
-#include <numeric>
-#include <string>
-#include <vector>
-#include "procs.h"
-#include "harness/testHarness.h"
-template <int N> struct TestInfo
-template <> struct TestInfo<2>
-    static const size_t vector_size = 2;
-    static constexpr const char* kernel_source_xyzw = R"CLC(
-__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    dst[index++].x = value.x;
-    dst[index++].y = value.x;
-    dst[index++].xy = value;
-    dst[index++].yx = value;
-    // rvalue swizzles
-    dst[index++] = value.x;
-    dst[index++] = value.y;
-    dst[index++] = value.xy;
-    dst[index++] = value.yx;
-    static constexpr const char* kernel_source_rgba = R"CLC(
-__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    dst[index++].r = value.r;
-    dst[index++].g = value.r;
-    dst[index++].rg = value;
-    dst[index++].gr = value;
-    // rvalue swizzles
-    dst[index++] = value.r;
-    dst[index++] = value.g;
-    dst[index++] = value.rg;
-    dst[index++] =;
-    static constexpr const char* kernel_source_sN = R"CLC(
-__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    dst[index++].s0 = value.s0;
-    dst[index++].s1 = value.s0;
-    dst[index++].s01 = value;
-    dst[index++].s10 = value;
-    // rvalue swizzles
-    dst[index++] = value.s0;
-    dst[index++] = value.s1;
-    dst[index++] = value.s01;
-    dst[index++] = value.s10;
-template <> struct TestInfo<3>
-    static const size_t vector_size = 4; // sizeof(vec3) is four elements
-    static constexpr const char* kernel_source_xyzw = R"CLC(
-__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    TYPE t;
-    t = dst[index]; t.x = value.x;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.y = value.x;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.z = value.x;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.zyx = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    // rvalue swizzles
-    vstore3(value.x, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.y, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.z, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.zyx, 0, (__global BASETYPE*)(dst + index++));
-    static constexpr const char* kernel_source_rgba = R"CLC(
-__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    TYPE t;
-    t = dst[index]; t.r = value.r;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.g = value.r;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.b = value.r;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.rgb = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.bgr = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    // rvalue swizzles
-    vstore3(value.r, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.g, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.b, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.rgb, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.bgr, 0, (__global BASETYPE*)(dst + index++));
-    static constexpr const char* kernel_source_sN = R"CLC(
-__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    TYPE t;
-    t = dst[index]; t.s0 = value.s0;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.s1 = value.s0;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.s2 = value.s0;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.s012 = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    t = dst[index]; t.s210 = value;
-    vstore3(t, 0, (__global BASETYPE*)(dst + index++));
-    // rvalue swizzles
-    vstore3(value.s0, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.s1, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.s2, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.s012, 0, (__global BASETYPE*)(dst + index++));
-    vstore3(value.s210, 0, (__global BASETYPE*)(dst + index++));
-template <> struct TestInfo<4>
-    static const size_t vector_size = 4;
-    static constexpr const char* kernel_source_xyzw = R"CLC(
-__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    dst[index++].x = value.x;
-    dst[index++].y = value.x;
-    dst[index++].z = value.x;
-    dst[index++].w = value.x;
-    dst[index++].xyzw = value;
-    dst[index++].wzyx = value;
-    // rvalue swizzles
-    dst[index++] = value.x;
-    dst[index++] = value.y;
-    dst[index++] = value.z;
-    dst[index++] = value.w;
-    dst[index++] = value.xyzw;
-    dst[index++] = value.wzyx;
-    static constexpr const char* kernel_source_rgba = R"CLC(
-__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    dst[index++].r = value.r;
-    dst[index++].g = value.r;
-    dst[index++].b = value.r;
-    dst[index++].a = value.r;
-    dst[index++].rgba = value;
-    dst[index++].abgr = value;
-    // rvalue swizzles
-    dst[index++] = value.r;
-    dst[index++] = value.g;
-    dst[index++] = value.b;
-    dst[index++] = value.a;
-    dst[index++] = value.rgba;
-    dst[index++] = value.abgr;
-    static constexpr const char* kernel_source_sN = R"CLC(
-__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    dst[index++].s0 = value.s0;
-    dst[index++].s1 = value.s0;
-    dst[index++].s2 = value.s0;
-    dst[index++].s3 = value.s0;
-    dst[index++].s0123 = value;
-    dst[index++].s3210 = value;
-    // rvalue swizzles
-    dst[index++] = value.s0;
-    dst[index++] = value.s1;
-    dst[index++] = value.s2;
-    dst[index++] = value.s3;
-    dst[index++] = value.s0123;
-    dst[index++] = value.s3210;
-template <> struct TestInfo<8>
-    static const size_t vector_size = 8;
-    static constexpr const char* kernel_source_xyzw = R"CLC(
-__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // xwzw only for first four components!
-    // lvalue swizzles
-    dst[index++].x = value.x;
-    dst[index++].y = value.x;
-    dst[index++].z = value.x;
-    dst[index++].w = value.x;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index].xyzw = value.s0123;
-    dst[index++].s4567 = value.s4567;
-    dst[index].s7654 = value.s0123;
-    dst[index++].wzyx = value.s4567;
-    // rvalue swizzles
-    dst[index++] = value.x;
-    dst[index++] = value.y;
-    dst[index++] = value.z;
-    dst[index++] = value.w;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = (TYPE)(value.xyzw, value.s4567);
-    dst[index++] = (TYPE)(value.s7654, value.wzyx);
-    static constexpr const char* kernel_source_rgba = R"CLC(
-__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // rgba only for first four components!
-    // lvalue swizzles
-    dst[index++].r = value.r;
-    dst[index++].g = value.r;
-    dst[index++].b = value.r;
-    dst[index++].a = value.r;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index].rgba = value.s0123;
-    dst[index++].s4567 = value.s4567;
-    dst[index].s7654 = value.s0123;
-    dst[index++].abgr = value.s4567;
-    // rvalue swizzles
-    dst[index++] = value.r;
-    dst[index++] = value.g;
-    dst[index++] = value.b;
-    dst[index++] = value.a;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = (TYPE)(value.rgba, value.s4567);
-    dst[index++] = (TYPE)(value.s7654, value.abgr);
-    static constexpr const char* kernel_source_sN = R"CLC(
-__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    dst[index++].s0 = value.s0;
-    dst[index++].s1 = value.s0;
-    dst[index++].s2 = value.s0;
-    dst[index++].s3 = value.s0;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index++].s01234567 = value;
-    dst[index++].s76543210 = value;
-    // rvalue swizzles
-    dst[index++] = value.s0;
-    dst[index++] = value.s1;
-    dst[index++] = value.s2;
-    dst[index++] = value.s3;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = value.s01234567;
-    dst[index++] = value.s76543210;
-template <> struct TestInfo<16>
-    static const size_t vector_size = 16;
-    static constexpr const char* kernel_source_xyzw = R"CLC(
-__kernel void test_vector_swizzle_xyzw(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // xwzw only for first four components!
-    // lvalue swizzles
-    dst[index++].x = value.x;
-    dst[index++].y = value.x;
-    dst[index++].z = value.x;
-    dst[index++].w = value.x;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index++].s8 = value.s0;
-    dst[index++].s9 = value.s0;
-    dst[index++].sa = value.s0;
-    dst[index++].sb = value.s0;
-    dst[index++].sc = value.s0;
-    dst[index++].sd = value.s0;
-    dst[index++].se = value.s0;
-    dst[index++].sf = value.s0;
-    dst[index].xyzw = value.s0123;
-    dst[index].s4567 = value.s4567;
-    dst[index].s89ab = value.s89ab;
-    dst[index++].scdef = value.scdef;
-    dst[index].sfedc = value.s0123;
-    dst[index].sba98 = value.s4567;
-    dst[index].s7654 = value.s89ab;
-    dst[index++].wzyx = value.scdef;
-    // rvalue swizzles
-    dst[index++] = value.x;
-    dst[index++] = value.y;
-    dst[index++] = value.z;
-    dst[index++] = value.w;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = value.s8;
-    dst[index++] = value.s9;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] = value.sf;
-    dst[index++] = (TYPE)(value.xyzw, value.s4567, value.s89abcdef);
-    dst[index++] = (TYPE)(value.sfedcba98, value.s7654, value.wzyx);
-    static constexpr const char* kernel_source_rgba = R"CLC(
-__kernel void test_vector_swizzle_rgba(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // rgba only for first four components!
-    // lvalue swizzles
-    dst[index++].r = value.r;
-    dst[index++].g = value.r;
-    dst[index++].b = value.r;
-    dst[index++].a = value.r;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index++].s8 = value.s0;
-    dst[index++].s9 = value.s0;
-    dst[index++].sa = value.s0;
-    dst[index++].sb = value.s0;
-    dst[index++].sc = value.s0;
-    dst[index++].sd = value.s0;
-    dst[index++].se = value.s0;
-    dst[index++].sf = value.s0;
-    dst[index].rgba = value.s0123;
-    dst[index].s4567 = value.s4567;
-    dst[index].s89ab = value.s89ab;
-    dst[index++].scdef = value.scdef;
-    dst[index].sfedc = value.s0123;
-    dst[index].sba98 = value.s4567;
-    dst[index].s7654 = value.s89ab;
-    dst[index++].abgr = value.scdef;
-    // rvalue swizzles
-    dst[index++] = value.r;
-    dst[index++] = value.g;
-    dst[index++] = value.b;
-    dst[index++] = value.a;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = value.s8;
-    dst[index++] = value.s9;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] = value.sf;
-    dst[index++] = (TYPE)(value.rgba, value.s4567, value.s89abcdef);
-    dst[index++] = (TYPE)(value.sfedcba98, value.s7654, value.abgr);
-    static constexpr const char* kernel_source_sN = R"CLC(
-__kernel void test_vector_swizzle_sN(TYPE value, __global TYPE* dst) {
-    int index = 0;
-    // lvalue swizzles
-    dst[index++].s0 = value.s0;
-    dst[index++].s1 = value.s0;
-    dst[index++].s2 = value.s0;
-    dst[index++].s3 = value.s0;
-    dst[index++].s4 = value.s0;
-    dst[index++].s5 = value.s0;
-    dst[index++].s6 = value.s0;
-    dst[index++].s7 = value.s0;
-    dst[index++].s8 = value.s0;
-    dst[index++].s9 = value.s0;
-    dst[index++].sa = value.s0;
-    dst[index++].sb = value.s0;
-    dst[index++].sc = value.s0;
-    dst[index++].sd = value.s0;
-    dst[index++].se = value.s0;
-    dst[index++].sf = value.s0;
-    dst[index++].s0123456789abcdef = value; // lower-case
-    dst[index++].sFEDCBA9876543210 = value; // upper-case
-    // rvalue swizzles
-    dst[index++] = value.s0;
-    dst[index++] = value.s1;
-    dst[index++] = value.s2;
-    dst[index++] = value.s3;
-    dst[index++] = value.s4;
-    dst[index++] = value.s5;
-    dst[index++] = value.s6;
-    dst[index++] = value.s7;
-    dst[index++] = value.s8;
-    dst[index++] = value.s9;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] =;
-    dst[index++] = value.sf;
-    dst[index++] = value.s0123456789abcdef; // lower-case
-    dst[index++] = value.sFEDCBA9876543210; // upper-case
-template <typename T, size_t N, size_t S>
-static void makeReference(std::vector<T>& ref)
-    // N single channel lvalue tests
-    // 2 multi-value lvalue tests
-    // N single channel rvalue tests
-    // 2 multi-value rvalue tests
-    const size_t refSize = (N + 2 + N + 2) * S;
-    ref.resize(refSize);
-    std::fill(ref.begin(), ref.end(), 99);
-    size_t dstIndex = 0;
-    // single channel lvalue
-    for (size_t i = 0; i < N; i++)
-    {
-        ref[dstIndex * S + i] = 0;
-        ++dstIndex;
-    }
-    // normal lvalue
-    for (size_t c = 0; c < N; c++)
-    {
-        ref[dstIndex * S + c] = c;
-    }
-    ++dstIndex;
-    // reverse lvalue
-    for (size_t c = 0; c < N; c++)
-    {
-        ref[dstIndex * S + c] = N - c - 1;
-    }
-    ++dstIndex;
-    // single channel rvalue
-    for (size_t i = 0; i < N; i++)
-    {
-        for (size_t c = 0; c < N; c++)
-        {
-            ref[dstIndex * S + c] = i;
-        }
-        ++dstIndex;
-    }
-    // normal rvalue
-    for (size_t c = 0; c < N; c++)
-    {
-        ref[dstIndex * S + c] = c;
-    }
-    ++dstIndex;
-    // reverse rvalue
-    for (size_t c = 0; c < N; c++)
-    {
-        ref[dstIndex * S + c] = N - c - 1;
-    }
-    ++dstIndex;
-    assert(dstIndex * S == refSize);
-template <typename T>
-static int
-test_vectype_case(const std::vector<T>& value, const std::vector<T>& reference,
-                  cl_context context, cl_kernel kernel, cl_command_queue queue)
-    cl_int error = CL_SUCCESS;
-    clMemWrapper mem;
-    std::vector<T> buffer(reference.size(), 99);
-    mem = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                         buffer.size() * sizeof(T),, &error);
-    test_error(error, "Unable to create test buffer");
-    error = clSetKernelArg(kernel, 0, value.size() * sizeof(T),;
-    test_error(error, "Unable to set value kernel arg");
-    error = clSetKernelArg(kernel, 1, sizeof(mem), &mem);
-    test_error(error, "Unable to set destination buffer kernel arg");
-    size_t global_work_size[] = { 1 };
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
-                                   NULL, 0, NULL, NULL);
-    test_error(error, "Unable to enqueue test kernel");
-    error = clFinish(queue);
-    test_error(error, "clFinish failed after test kernel");
-    error =
-        clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, buffer.size() * sizeof(T),
-                  , 0, NULL, NULL);
-    test_error(error, "Unable to read data after test kernel");
-    if (buffer != reference)
-    {
-        log_error("Result buffer did not match reference buffer!\n");
-        return TEST_FAIL;
-    }
-    return TEST_PASS;
-template <typename T, size_t N>
-static int test_vectype(const char* type_name, cl_device_id device,
-                        cl_context context, cl_command_queue queue)
-    log_info("    testing type %s%d\n", type_name, N);
-    cl_int error = CL_SUCCESS;
-    int result = TEST_PASS;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    std::string buildOptions{ "-DTYPE=" };
-    buildOptions += type_name;
-    buildOptions += std::to_string(N);
-    buildOptions += " -DBASETYPE=";
-    buildOptions += type_name;
-    constexpr size_t S = TestInfo<N>::vector_size;
-    std::vector<T> value(S);
-    std::iota(value.begin(), value.end(), 0);
-    std::vector<T> reference;
-    makeReference<T, N, S>(reference);
-    // XYZW swizzles:
-    const char* xyzw_source = TestInfo<N>::kernel_source_xyzw;
-    error = create_single_kernel_helper(
-        context, &program, &kernel, 1, &xyzw_source, "test_vector_swizzle_xyzw",
-        buildOptions.c_str());
-    test_error(error, "Unable to create xyzw test kernel");
-    result |= test_vectype_case(value, reference, context, kernel, queue);
-    // sN swizzles:
-    const char* sN_source = TestInfo<N>::kernel_source_sN;
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &sN_source, "test_vector_swizzle_sN",
-                                        buildOptions.c_str());
-    test_error(error, "Unable to create sN test kernel");
-    result |= test_vectype_case(value, reference, context, kernel, queue);
-    // RGBA swizzles for OpenCL 3.0 and newer:
-    const Version device_version = get_device_cl_version(device);
-    if (device_version >= Version(3, 0))
-    {
-        const char* rgba_source = TestInfo<N>::kernel_source_rgba;
-        error = create_single_kernel_helper(
-            context, &program, &kernel, 1, &rgba_source,
-            "test_vector_swizzle_rgba", buildOptions.c_str());
-        test_error(error, "Unable to create rgba test kernel");
-        result |= test_vectype_case(value, reference, context, kernel, queue);
-    }
-    return result;
-template <typename T>
-static int test_type(const char* type_name, cl_device_id device,
-                     cl_context context, cl_command_queue queue)
-    return test_vectype<T, 2>(type_name, device, context, queue)
-        | test_vectype<T, 3>(type_name, device, context, queue)
-        | test_vectype<T, 4>(type_name, device, context, queue)
-        | test_vectype<T, 8>(type_name, device, context, queue)
-        | test_vectype<T, 16>(type_name, device, context, queue);
-int test_vector_swizzle(cl_device_id device, cl_context context,
-                        cl_command_queue queue, int num_elements)
-    int hasDouble = is_extension_available(device, "cl_khr_fp64");
-    int result = TEST_PASS;
-    result |= test_type<cl_char>("char", device, context, queue);
-    result |= test_type<cl_uchar>("uchar", device, context, queue);
-    result |= test_type<cl_short>("short", device, context, queue);
-    result |= test_type<cl_ushort>("ushort", device, context, queue);
-    result |= test_type<cl_int>("int", device, context, queue);
-    result |= test_type<cl_uint>("uint", device, context, queue);
-    if (gHasLong)
-    {
-        result |= test_type<cl_long>("long", device, context, queue);
-        result |= test_type<cl_ulong>("ulong", device, context, queue);
-    }
-    result |= test_type<cl_float>("float", device, context, queue);
-    if (hasDouble)
-    {
-        result |= test_type<cl_double>("double", device, context, queue);
-    }
-    return result;
diff --git a/test_conformance/basic/test_wg_barrier.cpp b/test_conformance/basic/test_wg_barrier.cpp
index a237d80..7867512 100644
--- a/test_conformance/basic/test_wg_barrier.cpp
+++ b/test_conformance/basic/test_wg_barrier.cpp
@@ -87,9 +87,7 @@
     size_t max_threadgroup_size = 0;
     MTdata d;
-    err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &wg_barrier_kernel_code, "compute_sum",
-        nullptr);
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &wg_barrier_kernel_code, "compute_sum", "-cl-std=CL2.0" );
     test_error(err, "Failed to build kernel/program.");
     err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
@@ -110,15 +108,11 @@
     input_ptr = (int*)malloc(sizeof(int) * num_elements);
     output_ptr = (int*)malloc(sizeof(int));
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
     test_error(err, "clCreateBuffer failed.");
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int), NULL, &err);
     test_error(err, "clCreateBuffer failed.");
-    streams[2] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_int) * max_threadgroup_size, NULL, &err);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * max_threadgroup_size, NULL, &err);
     test_error(err, "clCreateBuffer failed.");
     d = init_genrand( gRandomSeed );
diff --git a/test_conformance/buffers/array_info.cpp b/test_conformance/buffers/array_info.cpp
index f143cf3..be33c00 100644
--- a/test_conformance/buffers/array_info.cpp
+++ b/test_conformance/buffers/array_info.cpp
@@ -33,8 +33,7 @@
     size_t          retSize;
     size_t          elementSize = sizeof( cl_int );
-    memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, elementSize * w * h * d,
-                            NULL, &err);
+    memobj = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  elementSize * w*h*d, NULL, &err);
     test_error(err, "clCreateBuffer failed.");
     err = clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof( size_t ), (void *)&retSize, NULL);
diff --git a/test_conformance/buffers/main.cpp b/test_conformance/buffers/main.cpp
index 7c5502a..1a5c864 100644
--- a/test_conformance/buffers/main.cpp
+++ b/test_conformance/buffers/main.cpp
@@ -22,104 +22,104 @@
 #include "harness/testHarness.h"
 test_definition test_list[] = {
-    ADD_TEST(buffer_read_async_int),
-    ADD_TEST(buffer_read_async_uint),
-    ADD_TEST(buffer_read_async_long),
-    ADD_TEST(buffer_read_async_ulong),
-    ADD_TEST(buffer_read_async_short),
-    ADD_TEST(buffer_read_async_ushort),
-    ADD_TEST(buffer_read_async_char),
-    ADD_TEST(buffer_read_async_uchar),
-    ADD_TEST(buffer_read_async_float),
-    ADD_TEST(buffer_read_array_barrier_int),
-    ADD_TEST(buffer_read_array_barrier_uint),
-    ADD_TEST(buffer_read_array_barrier_long),
-    ADD_TEST(buffer_read_array_barrier_ulong),
-    ADD_TEST(buffer_read_array_barrier_short),
-    ADD_TEST(buffer_read_array_barrier_ushort),
-    ADD_TEST(buffer_read_array_barrier_char),
-    ADD_TEST(buffer_read_array_barrier_uchar),
-    ADD_TEST(buffer_read_array_barrier_float),
-    ADD_TEST(buffer_read_int),
-    ADD_TEST(buffer_read_uint),
-    ADD_TEST(buffer_read_long),
-    ADD_TEST(buffer_read_ulong),
-    ADD_TEST(buffer_read_short),
-    ADD_TEST(buffer_read_ushort),
-    ADD_TEST(buffer_read_float),
-    ADD_TEST(buffer_read_half),
-    ADD_TEST(buffer_read_char),
-    ADD_TEST(buffer_read_uchar),
-    ADD_TEST(buffer_read_struct),
-    ADD_TEST(buffer_read_random_size),
-    ADD_TEST(buffer_map_read_int),
-    ADD_TEST(buffer_map_read_uint),
-    ADD_TEST(buffer_map_read_long),
-    ADD_TEST(buffer_map_read_ulong),
-    ADD_TEST(buffer_map_read_short),
-    ADD_TEST(buffer_map_read_ushort),
-    ADD_TEST(buffer_map_read_char),
-    ADD_TEST(buffer_map_read_uchar),
-    ADD_TEST(buffer_map_read_float),
-    ADD_TEST(buffer_map_read_struct),
+    ADD_TEST( buffer_read_async_int ),
+    ADD_TEST( buffer_read_async_uint ),
+    ADD_TEST( buffer_read_async_long ),
+    ADD_TEST( buffer_read_async_ulong ),
+    ADD_TEST( buffer_read_async_short ),
+    ADD_TEST( buffer_read_async_ushort ),
+    ADD_TEST( buffer_read_async_char ),
+    ADD_TEST( buffer_read_async_uchar ),
+    ADD_TEST( buffer_read_async_float ),
+    ADD_TEST( buffer_read_array_barrier_int ),
+    ADD_TEST( buffer_read_array_barrier_uint ),
+    ADD_TEST( buffer_read_array_barrier_long ),
+    ADD_TEST( buffer_read_array_barrier_ulong ),
+    ADD_TEST( buffer_read_array_barrier_short ),
+    ADD_TEST( buffer_read_array_barrier_ushort ),
+    ADD_TEST( buffer_read_array_barrier_char ),
+    ADD_TEST( buffer_read_array_barrier_uchar ),
+    ADD_TEST( buffer_read_array_barrier_float ),
+    ADD_TEST( buffer_read_int ),
+    ADD_TEST( buffer_read_uint ),
+    ADD_TEST( buffer_read_long ),
+    ADD_TEST( buffer_read_ulong ),
+    ADD_TEST( buffer_read_short ),
+    ADD_TEST( buffer_read_ushort ),
+    ADD_TEST( buffer_read_float ),
+    NOT_IMPLEMENTED_TEST( buffer_read_half ),
+    ADD_TEST( buffer_read_char ),
+    ADD_TEST( buffer_read_uchar ),
+    ADD_TEST( buffer_read_struct ),
+    ADD_TEST( buffer_read_random_size ),
+    ADD_TEST( buffer_map_read_int ),
+    ADD_TEST( buffer_map_read_uint ),
+    ADD_TEST( buffer_map_read_long ),
+    ADD_TEST( buffer_map_read_ulong ),
+    ADD_TEST( buffer_map_read_short ),
+    ADD_TEST( buffer_map_read_ushort ),
+    ADD_TEST( buffer_map_read_char ),
+    ADD_TEST( buffer_map_read_uchar ),
+    ADD_TEST( buffer_map_read_float ),
+    ADD_TEST( buffer_map_read_struct ),
-    ADD_TEST(buffer_map_write_int),
-    ADD_TEST(buffer_map_write_uint),
-    ADD_TEST(buffer_map_write_long),
-    ADD_TEST(buffer_map_write_ulong),
-    ADD_TEST(buffer_map_write_short),
-    ADD_TEST(buffer_map_write_ushort),
-    ADD_TEST(buffer_map_write_char),
-    ADD_TEST(buffer_map_write_uchar),
-    ADD_TEST(buffer_map_write_float),
-    ADD_TEST(buffer_map_write_struct),
+    ADD_TEST( buffer_map_write_int ),
+    ADD_TEST( buffer_map_write_uint ),
+    ADD_TEST( buffer_map_write_long ),
+    ADD_TEST( buffer_map_write_ulong ),
+    ADD_TEST( buffer_map_write_short ),
+    ADD_TEST( buffer_map_write_ushort ),
+    ADD_TEST( buffer_map_write_char ),
+    ADD_TEST( buffer_map_write_uchar ),
+    ADD_TEST( buffer_map_write_float ),
+    ADD_TEST( buffer_map_write_struct ),
-    ADD_TEST(buffer_write_int),
-    ADD_TEST(buffer_write_uint),
-    ADD_TEST(buffer_write_short),
-    ADD_TEST(buffer_write_ushort),
-    ADD_TEST(buffer_write_char),
-    ADD_TEST(buffer_write_uchar),
-    ADD_TEST(buffer_write_float),
-    ADD_TEST(buffer_write_half),
-    ADD_TEST(buffer_write_long),
-    ADD_TEST(buffer_write_ulong),
-    ADD_TEST(buffer_write_struct),
-    ADD_TEST(buffer_write_async_int),
-    ADD_TEST(buffer_write_async_uint),
-    ADD_TEST(buffer_write_async_short),
-    ADD_TEST(buffer_write_async_ushort),
-    ADD_TEST(buffer_write_async_char),
-    ADD_TEST(buffer_write_async_uchar),
-    ADD_TEST(buffer_write_async_float),
-    ADD_TEST(buffer_write_async_long),
-    ADD_TEST(buffer_write_async_ulong),
-    ADD_TEST(buffer_copy),
-    ADD_TEST(buffer_partial_copy),
-    ADD_TEST(mem_read_write_flags),
-    ADD_TEST(mem_write_only_flags),
-    ADD_TEST(mem_read_only_flags),
-    ADD_TEST(mem_copy_host_flags),
-    ADD_TEST(mem_alloc_ref_flags),
-    ADD_TEST(array_info_size),
+    ADD_TEST( buffer_write_int ),
+    ADD_TEST( buffer_write_uint ),
+    ADD_TEST( buffer_write_short ),
+    ADD_TEST( buffer_write_ushort ),
+    ADD_TEST( buffer_write_char ),
+    ADD_TEST( buffer_write_uchar ),
+    ADD_TEST( buffer_write_float ),
+    NOT_IMPLEMENTED_TEST( buffer_write_half ),
+    ADD_TEST( buffer_write_long ),
+    ADD_TEST( buffer_write_ulong ),
+    ADD_TEST( buffer_write_struct ),
+    ADD_TEST( buffer_write_async_int ),
+    ADD_TEST( buffer_write_async_uint ),
+    ADD_TEST( buffer_write_async_short ),
+    ADD_TEST( buffer_write_async_ushort ),
+    ADD_TEST( buffer_write_async_char ),
+    ADD_TEST( buffer_write_async_uchar ),
+    ADD_TEST( buffer_write_async_float ),
+    ADD_TEST( buffer_write_async_long ),
+    ADD_TEST( buffer_write_async_ulong ),
+    ADD_TEST( buffer_copy ),
+    ADD_TEST( buffer_partial_copy ),
+    ADD_TEST( mem_read_write_flags ),
+    ADD_TEST( mem_write_only_flags ),
+    ADD_TEST( mem_read_only_flags ),
+    ADD_TEST( mem_copy_host_flags ),
+    NOT_IMPLEMENTED_TEST( mem_alloc_ref_flags ),
+    ADD_TEST( array_info_size ),
-    ADD_TEST(sub_buffers_read_write),
-    ADD_TEST(sub_buffers_read_write_dual_devices),
-    ADD_TEST(sub_buffers_overlapping),
+    ADD_TEST( sub_buffers_read_write ),
+    ADD_TEST( sub_buffers_read_write_dual_devices ),
+    ADD_TEST( sub_buffers_overlapping ),
-    ADD_TEST(buffer_fill_int),
-    ADD_TEST(buffer_fill_uint),
-    ADD_TEST(buffer_fill_short),
-    ADD_TEST(buffer_fill_ushort),
-    ADD_TEST(buffer_fill_char),
-    ADD_TEST(buffer_fill_uchar),
-    ADD_TEST(buffer_fill_long),
-    ADD_TEST(buffer_fill_ulong),
-    ADD_TEST(buffer_fill_float),
-    ADD_TEST(buffer_fill_struct),
+    ADD_TEST( buffer_fill_int ),
+    ADD_TEST( buffer_fill_uint ),
+    ADD_TEST( buffer_fill_short ),
+    ADD_TEST( buffer_fill_ushort ),
+    ADD_TEST( buffer_fill_char ),
+    ADD_TEST( buffer_fill_uchar ),
+    ADD_TEST( buffer_fill_long ),
+    ADD_TEST( buffer_fill_ulong ),
+    ADD_TEST( buffer_fill_float ),
+    ADD_TEST( buffer_fill_struct ),
-    ADD_TEST(buffer_migrate),
-    ADD_TEST(image_migrate),
+    ADD_TEST( buffer_migrate ),
+    ADD_TEST( image_migrate ),
 const int test_num = ARRAY_SIZE( test_list );
@@ -141,5 +141,5 @@
 int main( int argc, const char *argv[] )
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/buffers/test_buffer_fill.cpp b/test_conformance/buffers/test_buffer_fill.cpp
index 9c9c7d1..142b7da 100644
--- a/test_conformance/buffers/test_buffer_fill.cpp
+++ b/test_conformance/buffers/test_buffer_fill.cpp
@@ -24,6 +24,8 @@
 #include "procs.h"
 #include "harness/errorHelpers.h"
+#define USE_LOCAL_WORK_GROUP    1
 #define TEST_PRIME_CHAR        0x77
 #define TEST_PRIME_INT        ((1<<16)+1)
 #define TEST_PRIME_UINT        ((1U<<16)+1U)
@@ -562,13 +564,18 @@
                      int loops, void *inptr[5], void *hostptr[5], void *pattern[5], size_t offset_elements, size_t fill_elements,
                      const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int) )
+    cl_mem      buffers[10];
     void        *outptr[5];
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    cl_event    event[2];
     size_t      ptrSizes[5];
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     int         err;
-    int i;
+    int         i, ii;
     int         src_flag_id;
     int         total_errors = 0;
@@ -582,39 +589,23 @@
     ptrSizes[3] = ptrSizes[2] << 1;
     ptrSizes[4] = ptrSizes[3] << 1;
-    loops = (loops < 5 ? loops : 5);
-    for (i = 0; i < loops; i++)
-    {
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error(" Error creating program for %s\n", type);
-            return -1;
-        }
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]);
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            clEventWrapper event[2];
-            clMemWrapper buffers[2];
+        loops = ( loops < 5 ? loops : 5 );
+        for ( i = 0; i < loops; i++ ){
+            ii = i << 1;
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
-                                            ptrSizes[i] * num_elements,
-                                            hostptr[i], &err);
+                buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, hostptr[i], &err);
-                buffers[0] =
-                    clCreateBuffer(context, flag_set[src_flag_id],
-                                   ptrSizes[i] * num_elements, NULL, &err);
-            if (!buffers[0] || err)
-            {
+                buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+            if ( !buffers[ii] || err){
                 print_error(err, "clCreateBuffer failed\n" );
                 return -1;
             // Initialize source buffer with 0, since the validation code expects 0(s) outside of the fill region.
             if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) {
-                err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0,
-                                           ptrSizes[i] * num_elements,
-                                           hostptr[i], 0, NULL, NULL);
+                err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, hostptr[i], 0, NULL, NULL);
                 if ( err != CL_SUCCESS ){
                     print_error(err, "clEnqueueWriteBuffer failed\n" );
                     return -1;
@@ -623,33 +614,50 @@
             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
             memset(outptr[i], 0, ptrSizes[i] * num_elements);
-            buffers[1] =
-                clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                               ptrSizes[i] * num_elements, outptr[i], &err);
-            if (!buffers[1] || err)
-            {
+            buffers[ii+1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  ptrSizes[i] * num_elements, outptr[i], &err);
+            if ( !buffers[ii+1] || err){
                 print_error(err, "clCreateBuffer failed\n" );
+                clReleaseMemObject( buffers[ii] );
                 align_free( outptr[i] );
                 return -1;
-            err = clEnqueueFillBuffer(
-                queue, buffers[0], pattern[i], ptrSizes[i],
-                ptrSizes[i] * offset_elements, ptrSizes[i] * fill_elements, 0,
-                NULL, &(event[0]));
+            err = clEnqueueFillBuffer(queue, buffers[ii], pattern[i], ptrSizes[i],
+                                      ptrSizes[i] * offset_elements, ptrSizes[i] * fill_elements,
+                                      0, NULL, &(event[0]));
+            /* uncomment for test debugging
+             err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &(event[0]));
+             */
             if ( err != CL_SUCCESS ){
                 print_error( err, " clEnqueueFillBuffer failed" );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
                 align_free( outptr[i] );
                 return -1;
-            err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
-                                 (void *)&buffers[0]);
-            err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
-                                  (void *)&buffers[1]);
+            err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                align_free( outptr[i] );
+                return -1;
+            }
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
+            err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clSetKernelArg failed" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
                 align_free( outptr[i] );
                 return -1;
@@ -657,19 +665,26 @@
             err = clWaitForEvents(  1, &(event[0]) );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
                 align_free( outptr[i] );
                 return -1;
+            clReleaseEvent(event[0]);
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
             if (err != CL_SUCCESS){
                 print_error( err, "clEnqueueNDRangeKernel failed" );
                 return -1;
-            err = clEnqueueReadBuffer(queue, buffers[1], false, 0,
-                                      ptrSizes[i] * num_elements, outptr[i], 0,
-                                      NULL, &(event[1]));
+            err = clEnqueueReadBuffer( queue, buffers[ii+1], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &(event[1]) );
             if (err != CL_SUCCESS){
                 print_error( err, "clEnqueueReadBuffer failed" );
                 return -1;
@@ -679,18 +694,21 @@
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
+            clReleaseEvent(event[1]);
             if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
-                log_error(" %s%d test failed. (cl_mem_flags: %s)\n", type,
-                          1 << i, flag_set_names[src_flag_id]);
+                log_error( " %s%d test failed\n", type, 1<<i );
-                log_info(" %s%d test passed (cl_mem_flags: %s)\n", type, 1 << i,
-                         flag_set_names[src_flag_id]);
+                log_info( " %s%d test passed\n", type, 1<<i );
             // cleanup
+            clReleaseMemObject( buffers[ii] );
+            clReleaseMemObject( buffers[ii+1] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
             align_free( outptr[i] );
     } // src cl_mem_flag
@@ -702,11 +720,19 @@
 int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
-    TestStruct pattern;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
+    cl_mem      buffers[2];
+    void        *outptr;
+    TestStruct  *inptr;
+    TestStruct  *hostptr;
+    TestStruct  *pattern;
+    cl_program  program;
+    cl_kernel   kernel;
+    cl_event    event[2];
     size_t      ptrSize = sizeof( TestStruct );
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     int         n, err;
     size_t      j, offset_elements, fill_elements;
     int         src_flag_id;
@@ -717,63 +743,35 @@
     global_work_size[0] = (size_t)num_elements;
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
-    for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-    {
-        log_info("Testing with cl_mem_flags: %s\n",
-                 flag_set_names[src_flag_id]);
+        pattern = (TestStruct *)malloc(ptrSize);
+        pattern->a = (cl_int)genrand_int32(d);
+        pattern->b = (cl_float)get_random_float( -FLT_MAX, FLT_MAX, d );
-        err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                          &struct_kernel_code,
-                                          "read_fill_struct");
-        if (err)
-        {
-            log_error(" Error creating program for struct\n");
-            free_mtdata(d);
-            return -1;
+        inptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment);
+        for ( j = 0; j < offset_elements; j++ ) {
+            inptr[j].a = 0;
+            inptr[j].b =0;
+        }
+        for ( j = offset_elements; j < offset_elements + fill_elements; j++ ) {
+            inptr[j].a = pattern->a;
+            inptr[j].b = pattern->b;
+        }
+        for ( j = offset_elements + fill_elements; j < (size_t)num_elements; j++ ) {
+            inptr[j].a = 0;
+            inptr[j].b = 0;
-        // Test with random offsets and fill sizes
-        for (n = 0; n < 8; n++)
-        {
-            clEventWrapper event[2];
-            clMemWrapper buffers[2];
-            void *outptr;
-            TestStruct *inptr;
-            TestStruct *hostptr;
+        hostptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment);
+        memset(hostptr, 0, ptrSize * num_elements);
-            offset_elements =
-                (size_t)get_random_float(0.f, (float)(num_elements - 8), d);
-            fill_elements = (size_t)get_random_float(
-                8.f, (float)(num_elements - offset_elements), d);
-            log_info("Testing random fill from offset %d for %d elements: \n",
-                     (int)offset_elements, (int)fill_elements);
-            pattern.a = (cl_int)genrand_int32(d);
-            pattern.b = (cl_float)get_random_float(-FLT_MAX, FLT_MAX, d);
-            inptr = (TestStruct *)align_malloc(ptrSize * num_elements,
-                                               min_alignment);
-            for (j = 0; j < offset_elements; j++)
-            {
-                inptr[j].a = 0;
-                inptr[j].b = 0;
-            }
-            for (j = offset_elements; j < offset_elements + fill_elements; j++)
-            {
-                inptr[j].a = pattern.a;
-                inptr[j].b = pattern.b;
-            }
-            for (j = offset_elements + fill_elements; j < (size_t)num_elements;
-                 j++)
-            {
-                inptr[j].a = 0;
-                inptr[j].b = 0;
-            }
-            hostptr = (TestStruct *)align_malloc(ptrSize * num_elements,
-                                                 min_alignment);
-            memset(hostptr, 0, ptrSize * num_elements);
+        for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+            log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]);
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSize * num_elements, hostptr, &err);
@@ -781,6 +779,9 @@
                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSize * num_elements, NULL, &err);
             if ( err ){
                 print_error(err, " clCreateBuffer failed\n" );
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
@@ -790,6 +791,9 @@
                 err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, hostptr, 0, NULL, NULL);
                 if ( err != CL_SUCCESS ){
                     print_error(err, " clEnqueueWriteBuffer failed\n" );
+                    clReleaseEvent( event[0] );
+                    clReleaseEvent( event[1] );
+                    free( (void *)pattern );
                     align_free( (void *)inptr );
                     align_free( (void *)hostptr );
@@ -802,32 +806,68 @@
             if ( ! buffers[1] || err){
                 print_error(err, " clCreateBuffer failed\n" );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
                 return -1;
-            err = clEnqueueFillBuffer(
-                queue, buffers[0], &pattern, ptrSize, ptrSize * offset_elements,
-                ptrSize * fill_elements, 0, NULL, &(event[0]));
+            err = clEnqueueFillBuffer(queue, buffers[0], pattern, ptrSize,
+                                      ptrSize * offset_elements, ptrSize * fill_elements,
+                                      0, NULL, &(event[0]));
             /* uncomment for test debugging
              err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, inptr, 0, NULL, &(event[0]));
             if ( err != CL_SUCCESS ){
                 print_error( err, " clEnqueueFillBuffer failed" );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
                 return -1;
+            err = create_single_kernel_helper( context, &program, &kernel, 1, &struct_kernel_code, "read_fill_struct" );
+            if ( err ){
+                log_error( " Error creating program for struct\n" );
+                align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
+                align_free( (void *)inptr );
+                align_free( (void *)hostptr );
+                free_mtdata(d);
+                return -1;
+            }
+            err = get_max_common_work_group_size( context, kernel, global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
             err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *)&buffers[0] );
             err |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *)&buffers[1] );
             if ( err != CL_SUCCESS ){
                 print_error( err, " clSetKernelArg failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
@@ -837,17 +877,36 @@
             err = clWaitForEvents(  1, &(event[0]) );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
                 return -1;
+            clReleaseEvent( event[0] );
+            err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
             err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL );
             if ( err != CL_SUCCESS ){
                 print_error( err, " clEnqueueNDRangeKernel failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
@@ -857,7 +916,14 @@
             err = clEnqueueReadBuffer( queue, buffers[1], CL_FALSE, 0, ptrSize * num_elements, outptr, 0, NULL, &(event[1]) );
             if ( err != CL_SUCCESS ){
                 print_error( err, " clEnqueueReadBuffer failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
                 align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
                 align_free( (void *)inptr );
                 align_free( (void *)hostptr );
@@ -868,6 +934,7 @@
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
+            clReleaseEvent( event[1] );
             if ( verify_fill_struct( inptr, outptr, num_elements) ) {
                 log_error( " buffer_FILL async struct test failed\n" );
@@ -877,10 +944,15 @@
                 log_info( " buffer_FILL async struct test passed\n" );
             // cleanup
+            clReleaseKernel( kernel );
+            clReleaseProgram( program );
             align_free( outptr );
-            align_free((void *)inptr);
-            align_free((void *)hostptr);
+            clReleaseMemObject( buffers[0] );
+            clReleaseMemObject( buffers[1] );
         } // src cl_mem_flag
+        free( (void *)pattern );
+        align_free( (void *)inptr );
+        align_free( (void *)hostptr );
diff --git a/test_conformance/buffers/test_buffer_map.cpp b/test_conformance/buffers/test_buffer_map.cpp
index 382c7a3..eebd30c 100644
--- a/test_conformance/buffers/test_buffer_map.cpp
+++ b/test_conformance/buffers/test_buffer_map.cpp
@@ -554,9 +554,10 @@
 static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
                                  const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
+    cl_mem      buffers[5];
     void        *outptr[5];
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
     size_t      threads[3], localThreads[3];
     cl_int      err;
     int         i;
@@ -579,20 +580,10 @@
     if (! gHasLong && strstr(type,"long"))
         return 0;
-    for (i = 0; i < loops; i++)
-    {
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error(" Error creating program for %s\n", type);
-            return -1;
-        }
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            clMemWrapper buffer;
+        for ( i = 0; i < loops; i++ ){
             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
                 log_error( " unable to allocate %d bytes of memory\n", (int)ptrSizes[i] * num_elements );
@@ -600,24 +591,30 @@
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                buffer =
-                    clCreateBuffer(context, flag_set[src_flag_id],
-                                   ptrSizes[i] * num_elements, outptr[i], &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
-                buffer = clCreateBuffer(context, flag_set[src_flag_id],
-                                        ptrSizes[i] * num_elements, NULL, &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
-            if (!buffer || err)
-            {
+            if ( ! buffers[i] | err){
                 print_error(err, "clCreateBuffer failed\n" );
                 align_free( outptr[i] );
                 return -1;
-            err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
+            err = create_single_kernel_helper(context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[i] );
+                align_free( outptr[i] );
+                return -1;
+            }
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clSetKernelArg failed\n" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[i] );
                 align_free( outptr[i] );
                 return -1;
@@ -630,34 +627,39 @@
             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueNDRangeKernel failed\n" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[i] );
                 align_free( outptr[i] );
                 return -1;
-            mappedPtr = clEnqueueMapBuffer(queue, buffer, CL_TRUE, CL_MAP_READ,
-                                           0, ptrSizes[i] * num_elements, 0,
-                                           NULL, NULL, &err);
-            if (err != CL_SUCCESS)
-            {
+            mappedPtr = clEnqueueMapBuffer(queue, buffers[i], CL_TRUE, CL_MAP_READ, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
+            if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueMapBuffer failed" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[i] );
                 align_free( outptr[i] );
                 return -1;
             if (fn(mappedPtr, num_elements*(1<<i))){
-                log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
-                          1 << i, flag_set_names[src_flag_id]);
+                log_error(" %s%d test failed\n", type, 1<<i);
-                log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
-                         1 << i, flag_set_names[src_flag_id]);
+                log_info(" %s%d test passed\n", type, 1<<i);
-            err = clEnqueueUnmapMemObject(queue, buffer, mappedPtr, 0, NULL,
-                                          NULL);
+            err = clEnqueueUnmapMemObject(queue, buffers[i], mappedPtr, 0, NULL, NULL);
             test_error(err, "clEnqueueUnmapMemObject failed");
+            // cleanup
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( buffers[i] );
             // If we are using the outptr[i] as backing via USE_HOST_PTR we need to make sure we are done before freeing.
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)) {
                 err = clFinish(queue);
diff --git a/test_conformance/buffers/test_buffer_mem.cpp b/test_conformance/buffers/test_buffer_mem.cpp
index 52eb723..a471c0f 100644
--- a/test_conformance/buffers/test_buffer_mem.cpp
+++ b/test_conformance/buffers/test_buffer_mem.cpp
@@ -27,6 +27,8 @@
 typedef unsigned char uchar;
 const char *mem_read_write_kernel_code =
 "__kernel void test_mem_read_write(__global int *dst)\n"
@@ -37,12 +39,12 @@
 const char *mem_read_kernel_code =
-    "__kernel void test_mem_read(__global int *dst, __global int *src)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = src[tid]+1;\n"
-    "}\n";
+"__kernel void test_mem_read(__global int *src, __global int *dst)\n"
+"    int  tid = get_global_id(0);\n"
+"    dst[tid] = src[tid]+1;\n"
 const char *mem_write_kernel_code =
 "__kernel void test_mem_write(__global int *dst)\n"
@@ -66,179 +68,457 @@
-int test_mem_flags(cl_context context, cl_command_queue queue, int num_elements,
-                   cl_mem_flags flags, const char **kernel_program,
-                   const char *kernel_name)
+int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
-    clMemWrapper buffers[2];
+    cl_mem      buffers[1];
     cl_int      *inptr, *outptr;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     cl_int      err;
     int         i;
     size_t      min_alignment = get_min_alignment(context);
-    bool test_read_only = (flags & CL_MEM_READ_ONLY) != 0;
-    bool test_write_only = (flags & CL_MEM_WRITE_ONLY) != 0;
-    bool copy_host_ptr = (flags & CL_MEM_COPY_HOST_PTR) != 0;
     global_work_size[0] = (cl_uint)num_elements;
     inptr = (cl_int*)align_malloc(sizeof(cl_int)  * num_elements, min_alignment);
-    if (!inptr)
-    {
-        log_error(" unable to allocate %d bytes of memory\n",
-                  (int)sizeof(cl_int) * num_elements);
-        return -1;
-    }
     outptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
-    if (!outptr)
-    {
-        log_error(" unable to allocate %d bytes of memory\n",
-                  (int)sizeof(cl_int) * num_elements);
-        align_free((void *)inptr);
-        return -1;
-    }
-    for (i = 0; i < num_elements; i++) inptr[i] = i;
-    buffers[0] = clCreateBuffer(context, flags, sizeof(cl_int) * num_elements,
-                                copy_host_ptr ? inptr : NULL, &err);
-    if (err != CL_SUCCESS)
-    {
-        print_error(err, "clCreateBuffer failed");
-        align_free((void *)outptr);
-        align_free((void *)inptr);
-        return -1;
-    }
-    if (!copy_host_ptr)
-    {
-        err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
-                                   sizeof(cl_int) * num_elements, (void *)inptr,
-                                   0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            print_error(err, "clEnqueueWriteBuffer failed");
-            align_free((void *)outptr);
-            align_free((void *)inptr);
-            return -1;
-        }
-    }
-    if (test_read_only)
-    {
-        /* The read only buffer for mem_read_only_flags should be created above
-        with the correct flags as in other tests. However to make later test
-        code simpler, the additional read_write buffer required is stored as
-        the first buffer */
-        buffers[1] = buffers[0];
-        buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    sizeof(cl_int) * num_elements, NULL, &err);
-        if (err != CL_SUCCESS)
-        {
-            print_error(err, " clCreateBuffer failed \n");
-            align_free((void *)inptr);
-            align_free((void *)outptr);
-            return -1;
-        }
-    }
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      kernel_program, kernel_name);
-    if (err){
-        print_error(err, "creating kernel failed");
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, NULL, &err);
+    if (err != CL_SUCCESS) {
+        print_error( err, "clCreateBuffer failed");
         align_free( (void *)outptr );
         align_free( (void *)inptr );
         return -1;
-    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&buffers[0]);
-    if (test_read_only && (err == CL_SUCCESS))
-    {
-        err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&buffers[1]);
+    for (i=0; i<num_elements; i++)
+        inptr[i] = i;
+    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS) {
+        print_error( err, "clEnqueueWriteBuffer failed");
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
+    if (err){
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
     if ( err != CL_SUCCESS ){
         print_error( err, "clSetKernelArg failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
         align_free( (void *)outptr );
         align_free( (void *)inptr );
         return -1;
-    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, NULL,
-                                 0, NULL, NULL);
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
     if (err != CL_SUCCESS){
         log_error("clEnqueueNDRangeKernel failed\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
         align_free( (void *)outptr );
         align_free( (void *)inptr );
         return -1;
-    err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
-                              sizeof(cl_int) * num_elements, (void *)outptr, 0,
-                              NULL, NULL);
+    err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
     if ( err != CL_SUCCESS ){
         print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
         align_free( (void *)outptr );
         align_free( (void *)inptr );
         return -1;
-    if (!test_write_only)
-    {
-        if (verify_mem(outptr, num_elements))
-        {
-            log_error("test failed\n");
-            err = -1;
-        }
-        else
-        {
-            log_info("test passed\n");
-            err = 0;
-        }
+    if (verify_mem(outptr, num_elements)){
+        log_error("buffer_MEM_READ_WRITE test failed\n");
+        err = -1;
+    }
+    else{
+        log_info("buffer_MEM_READ_WRITE test passed\n");
+        err = 0;
     // cleanup
+    clReleaseMemObject( buffers[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
     align_free( (void *)outptr );
     align_free( (void *)inptr );
     return err;
-} // end test_mem_flags()
+}   // end test_mem_read_write()
-int test_mem_read_write_flags(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements)
+int test_mem_write_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
-    return test_mem_flags(context, queue, num_elements, CL_MEM_READ_WRITE,
-                          &mem_read_write_kernel_code, "test_mem_read_write");
+    cl_mem      buffers[1];
+    int         *inptr, *outptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+    size_t      local_work_size[3];
+    cl_int      err;
+    int         i;
+    size_t      min_alignment = get_min_alignment(context);
-int test_mem_write_only_flags(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements)
-    return test_mem_flags(context, queue, num_elements, CL_MEM_WRITE_ONLY,
-                          &mem_write_kernel_code, "test_mem_write");
+    global_work_size[0] = (cl_uint)num_elements;
+    inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! inptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        return -1;
+    }
+    outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! outptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    buffers[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
+    if (err != CL_SUCCESS)
+    {
+        print_error(err, "clCreateBuffer failed\n");
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    for (i=0; i<num_elements; i++)
+        inptr[i] = i;
+    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS){
+        print_error( err, "clEnqueueWriteBuffer failed" );
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_write_kernel_code, "test_mem_write" );
+    if (err){
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clSetKernelArg failed");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "Error reading array" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    // cleanup
+    clReleaseMemObject( buffers[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( (void *)outptr );
+    align_free( (void *)inptr );
+    return err;
+}   // end test_mem_write()
 int test_mem_read_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
-    return test_mem_flags(context, queue, num_elements, CL_MEM_READ_ONLY,
-                          &mem_read_kernel_code, "test_mem_read");
+    cl_mem      buffers[2];
+    int         *inptr, *outptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+    size_t      local_work_size[3];
+    cl_int      err;
+    int         i;
+    size_t      min_alignment = get_min_alignment(context);
+    global_work_size[0] = (cl_uint)num_elements;
+    inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! inptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        return -1;
+    }
+    outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! outptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
+    if ( err != CL_SUCCESS ){
+        print_error(err, " clCreateBuffer failed to create READ_ONLY array\n" );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    for (i=0; i<num_elements; i++)
+        inptr[i] = i;
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
+    if ( err != CL_SUCCESS ){
+        print_error(err, " clCreateBuffer failed to create MEM_ALLOC_GLOBAL_POOL array\n" );
+        clReleaseMemObject( buffers[0]) ;
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueWriteBuffer() failed");
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_kernel_code, "test_mem_read" );
+    if ( err ){
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&buffers[1] );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clSetKernelArgs failed" );
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+    if (verify_mem(outptr, num_elements)){
+        log_error( " CL_MEM_READ_ONLY test failed\n" );
+        err = -1;
+    }
+    else{
+        log_info( " CL_MEM_READ_ONLY test passed\n" );
+        err = 0;
+    }
+    // cleanup
+    clReleaseMemObject( buffers[1]) ;
+    clReleaseMemObject( buffers[0]) ;
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( (void *)inptr );
+    align_free( (void *)outptr );
+    return err;
+}   // end test_mem_read()
 int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
-    return test_mem_flags(context, queue, num_elements,
-                          CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE,
-                          &mem_read_write_kernel_code, "test_mem_read_write");
+    cl_mem      buffers[1];
+    int         *ptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+    size_t      local_work_size[3];
+    cl_int      err;
+    int         i;
-int test_mem_alloc_ref_flags(cl_device_id deviceID, cl_context context,
-                             cl_command_queue queue, int num_elements)
-    return test_mem_flags(context, queue, num_elements,
-                          CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
-                          &mem_read_write_kernel_code, "test_mem_read_write");
+    size_t min_alignment = get_min_alignment(context);
+    global_work_size[0] = (cl_uint)num_elements;
+    ptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! ptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        return -1;
+    }
+    for (i=0; i<num_elements; i++)
+        ptr[i] = i;
+    buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, (void *)ptr, &err);
+    if (err != CL_SUCCESS){
+        print_error(err, "clCreateBuffer failed for CL_MEM_COPY_HOST_PTR\n");
+        align_free( (void *)ptr );
+        return -1;
+    }
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
+    if (err){
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    if (err != CL_SUCCESS){
+        log_error("clSetKernelArgs failed\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS){
+        log_error("clEnqueueNDRangeKernel failed\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS){
+        log_error("CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_CONSTANT_POOL failed.\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+    if ( verify_mem( ptr, num_elements ) ){
+        log_error("CL_MEM_COPY_HOST_PTR test failed\n");
+        err = -1;
+    }
+    else{
+        log_info("CL_MEM_COPY_HOST_PTR test passed\n");
+        err = 0;
+    }
+    // cleanup
+    clReleaseMemObject( buffers[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( (void *)ptr );
+    return err;
+}   // end test_mem_copy_host_flags()
diff --git a/test_conformance/buffers/test_buffer_migrate.cpp b/test_conformance/buffers/test_buffer_migrate.cpp
index f309836..a5b6f26 100644
--- a/test_conformance/buffers/test_buffer_migrate.cpp
+++ b/test_conformance/buffers/test_buffer_migrate.cpp
@@ -295,9 +295,9 @@
             if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 1, NULL, wgs, NULL, 0, NULL, NULL)) != CL_SUCCESS) {
-                print_error(err, "Failed enqueuing the NDRange kernel.");
-                failed = 1;
-                goto cleanup;
+              print_error(err, "Failed enqueueing the NDRange kernel.");
+              failed = 1;
+              goto cleanup;
           // Verify the results as long as neither input is an undefined migration
diff --git a/test_conformance/buffers/test_buffer_read.cpp b/test_conformance/buffers/test_buffer_read.cpp
index 39cf329..de5bdf9 100644
--- a/test_conformance/buffers/test_buffer_read.cpp
+++ b/test_conformance/buffers/test_buffer_read.cpp
@@ -21,7 +21,6 @@
 #include <time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <CL/cl_half.h>
 #include "procs.h"
@@ -326,7 +325,6 @@
 static const char *buffer_read_half_kernel_code[] = {
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
     "__kernel void test_buffer_read_half(__global half *dst)\n"
     "    int  tid = get_global_id(0);\n"
@@ -334,7 +332,6 @@
     "    dst[tid] = (half)119;\n"
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
     "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
     "    int  tid = get_global_id(0);\n"
@@ -342,7 +339,6 @@
     "    dst[tid] = (half)119;\n"
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
     "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
     "    int  tid = get_global_id(0);\n"
@@ -350,7 +346,6 @@
     "    dst[tid] = (half)119;\n"
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
     "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
     "    int  tid = get_global_id(0);\n"
@@ -358,14 +353,12 @@
     "    dst[tid] = (half)119;\n"
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
     "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
     "    int  tid = get_global_id(0);\n"
     "    dst[tid] = (half)119;\n"
-    "}\n"
+    "}\n" };
 static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
@@ -564,11 +557,11 @@
 static int verify_read_half( void *ptr, int n )
     int     i;
-    cl_half *outptr = (cl_half *)ptr;
+    float   *outptr = (float *)ptr; // FIXME: should this be cl_half_float?
-    for (i = 0; i < n; i++)
-    {
-        if (cl_half_to_float(outptr[i]) != TEST_PRIME_HALF) return -1;
+    for ( i = 0; i < n / 2; i++ ){
+        if ( outptr[i] != TEST_PRIME_HALF )
+            return -1;
     return 0;
@@ -621,11 +614,15 @@
 int test_buffer_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
                       const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
+    cl_mem      buffers[5];
     void        *outptr[5];
     void        *inptr[5];
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     cl_int      err;
     int         i;
     size_t      ptrSizes[5];
@@ -649,21 +646,10 @@
         return CL_SUCCESS;
-    for (i = 0; i < loops; i++)
-    {
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error("Creating program for %s\n", type);
-            print_error(err, " Error creating program ");
-            return -1;
-        }
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            clMemWrapper buffer;
+        for ( i = 0; i < loops; i++ ){
             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
                 log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
@@ -677,63 +663,80 @@
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                buffer =
-                    clCreateBuffer(context, flag_set[src_flag_id],
-                                   ptrSizes[i] * num_elements, inptr[i], &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
-                buffer = clCreateBuffer(context, flag_set[src_flag_id],
-                                        ptrSizes[i] * num_elements, NULL, &err);
-            if (err != CL_SUCCESS)
-            {
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+            if ( err != CL_SUCCESS ){
                 print_error(err, " clCreateBuffer failed\n" );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
-            err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
+            err = create_single_kernel_helper(  context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error("Creating program for %s\n", type);
+                print_error(err,  " Error creating program " );
+                clReleaseMemObject(buffers[i]);
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clSetKernelArg failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
-            err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL,
-                                         global_work_size, NULL, 0, NULL, NULL);
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
-            err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
-                                      ptrSizes[i] * num_elements, outptr[i], 0,
-                                      NULL, NULL);
+            err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             if (fn(outptr[i], num_elements*(1<<i))){
-                log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
-                          1 << i, flag_set_names[src_flag_id]);
+                log_error( " %s%d test failed\n", type, 1<<i );
-                log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
-                         1 << i, flag_set_names[src_flag_id]);
+                log_info( " %s%d test passed\n", type, 1<<i );
-            err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
-                                      ptrSizes[i] * num_elements, inptr[i], 0,
-                                      NULL, NULL);
-            if (err != CL_SUCCESS)
-            {
+            err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL );
+            if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
@@ -749,6 +752,9 @@
             // cleanup
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
             align_free( outptr[i] );
             align_free( inptr[i] );
@@ -761,12 +767,16 @@
 int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
                             const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
-    clEventWrapper event;
+    cl_mem      buffers[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    cl_event    event;
     void        *outptr[5];
     void        *inptr[5];
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     cl_int      err;
     int         i;
     size_t      lastIndex;
@@ -791,20 +801,10 @@
         return CL_SUCCESS;
-    for (i = 0; i < loops; i++)
-    {
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error(" Error creating program for %s\n", type);
-            return -1;
-        }
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            clMemWrapper buffer;
+        for ( i = 0; i < loops; i++ ){
             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
@@ -820,12 +820,9 @@
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                buffer =
-                    clCreateBuffer(context, flag_set[src_flag_id],
-                                   ptrSizes[i] * num_elements, inptr[i], &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
-                buffer = clCreateBuffer(context, flag_set[src_flag_id],
-                                        ptrSizes[i] * num_elements, NULL, &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
             if ( err != CL_SUCCESS ){
                 print_error(err, " clCreateBuffer failed\n" );
                 align_free( outptr[i] );
@@ -833,26 +830,46 @@
                 return -1;
-            err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
-            if ( err != CL_SUCCESS ){
-                print_error( err, "clSetKernelArg failed" );
+            err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i]);
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clSetKernelArg failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
-            err = clEnqueueReadBuffer(queue, buffer, false, 0,
-                                      ptrSizes[i] * num_elements, outptr[i], 0,
-                                      NULL, &event);
+            err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &event );
             if ( ((uchar *)outptr[i])[lastIndex] ){
                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
@@ -861,6 +878,9 @@
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
@@ -868,22 +888,27 @@
             err = clWaitForEvents(1, &event );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             if ( fn(outptr[i], num_elements*(1<<i)) ){
-                log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
-                          1 << i, flag_set_names[src_flag_id]);
+                log_error( " %s%d test failed\n", type, 1<<i );
-                log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
-                         1 << i, flag_set_names[src_flag_id]);
+                log_info( " %s%d test passed\n", type, 1<<i );
             // cleanup
+            clReleaseEvent( event );
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
             align_free( outptr[i] );
             align_free( inptr[i] );
@@ -898,11 +923,15 @@
 int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
                                     const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
-    clEventWrapper event;
+    cl_mem      buffers[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    cl_event    event;
     void        *outptr[5], *inptr[5];
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     cl_int      err;
     int         i;
     size_t      lastIndex;
@@ -927,20 +956,10 @@
         return CL_SUCCESS;
-    for (i = 0; i < loops; i++)
-    {
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error(" Error creating program for %s\n", type);
-            return -1;
-        }
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            clMemWrapper buffer;
+        for ( i = 0; i < loops; i++ ){
             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
@@ -955,12 +974,9 @@
             memset( inptr[i], 0, ptrSizes[i] * num_elements );  // initialize to zero to tell difference
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                buffer =
-                    clCreateBuffer(context, flag_set[src_flag_id],
-                                   ptrSizes[i] * num_elements, inptr[i], &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
-                buffer = clCreateBuffer(context, flag_set[src_flag_id],
-                                        ptrSizes[i] * num_elements, NULL, &err);
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
             if ( err != CL_SUCCESS ){
                 print_error(err, " clCreateBuffer failed\n" );
                 align_free( outptr[i] );
@@ -968,26 +984,46 @@
                 return -1;
-            err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
-            if ( err != CL_SUCCESS ){
-                print_error( err, "clSetKernelArgs failed" );
+            err = create_single_kernel_helper(  context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clSetKernelArgs failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
-            err = clEnqueueReadBuffer(queue, buffer, false, 0,
-                                      ptrSizes[i] * num_elements,
-                                      (void *)(outptr[i]), 0, NULL, &event);
+            err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, (void *)(outptr[i]), 0, NULL, &event );
             if ( ((uchar *)outptr[i])[lastIndex] ){
                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
@@ -996,6 +1032,9 @@
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
@@ -1003,6 +1042,9 @@
             err = clEnqueueBarrierWithWaitList(queue, 0, NULL, NULL);
             if ( err != CL_SUCCESS ){
                 print_error( err, "clEnqueueBarrierWithWaitList() failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 return -1;
@@ -1010,22 +1052,27 @@
             err = clWaitForEvents(1, &event);
             if ( err != CL_SUCCESS ){
                 print_error( err, "clWaitForEvents() failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( inptr[i] );
                 return -1;
             if ( fn(outptr[i], num_elements*(1<<i)) ){
-                log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
-                          1 << i, flag_set_names[src_flag_id]);
+                log_error(" %s%d test failed\n", type, 1<<i);
-                log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
-                         1 << i, flag_set_names[src_flag_id]);
+                log_info(" %s%d test passed\n", type, 1<<i);
             // cleanup
+            clReleaseEvent( event );
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
             align_free( outptr[i] );
             align_free( inptr[i] );
@@ -1052,10 +1099,8 @@
 DECLARE_READ_TEST(char, cl_char)
 DECLARE_READ_TEST(uchar, cl_uchar)
-int test_buffer_read_half(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements)
+int test_buffer_half_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
     return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
                              buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
@@ -1096,6 +1141,76 @@
 DECLARE_BARRIER_TEST(uchar, cl_uchar)
 DECLARE_BARRIER_TEST(float, cl_float)
+ int test_buffer_half_read(cl_device_group device, cl_device id, cl_context context, int num_elements)
+ {
+ cl_mem        buffers[1];
+ float        *outptr;
+ cl_program program[1];
+ cl_kernel    kernel[1];
+ void        *values[1];
+ size_t        sizes[1] = { sizeof(cl_buffer) };
+ uint        threads[1];
+ int        err;
+ int        i;
+ size_t        ptrSize;    // sizeof(half)
+ ptrSize = sizeof(cl_float)/2;
+ outptr = (float *)malloc(ptrSize * num_elements);
+ buffers[0] = clCreateBuffer(device, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSize * num_elements, NULL);
+ if( !buffers[0] ){
+ log_error("clCreateBuffer failed\n");
+ return -1;
+ }
+ err = create_program_and_kernel(device, buffer_read_half_kernel_code, "test_buffer_read_half", &program[0], &kernel[0]);
+ if( err ){
+ log_error( " Error creating program for half\n" );
+ clReleaseMemObject(buffers[0]);
+ free( (void *)outptr );
+ return -1;
+ }
+ values[0] = buffers[0];
+ err = clSetKernelArgs(context, kernel[0], 1, NULL, &(values[i]), sizes);
+ if( err != CL_SUCCESS ){
+ log_error("clSetKernelArgs failed\n");
+ return -1;
+ }
+ global_work_size[0] = (cl_uint)num_elements;
+ err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
+ if( err != CL_SUCCESS ){
+ log_error("clEnqueueNDRangeKernel failed\n");
+ return -1;
+ }
+ err = clEnqueueReadBuffer( queue, buffers[0], true, 0, ptrSize*num_elements, (void *)outptr, 0, NULL, NULL );
+ if( err != CL_SUCCESS ){
+ log_error("clEnqueueReadBuffer failed: %d\n", err);
+ return -1;
+ }
+ if( verify_read_half( outptr, num_elements >> 1 ) ){
+ log_error( "buffer_READ half test failed\n" );
+ err = -1;
+ }
+ else{
+ log_info( "buffer_READ half test passed\n" );
+ err = 0;
+ }
+ // cleanup
+ clReleaseMemObject( buffers[0] );
+ clReleaseKernel( kernel[0] );
+ clReleaseProgram( program[0] );
+ free( (void *)outptr );
+ return err;
+ }    // end test_buffer_half_read()
+ */
 int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
     cl_mem      buffers[1];
@@ -1103,6 +1218,9 @@
     cl_program  program[1];
     cl_kernel   kernel[1];
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     cl_int      err;
     size_t      objSize = sizeof(TestStruct);
@@ -1115,8 +1233,7 @@
         log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
         return -1;
-    buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                objSize * num_elements, NULL, &err);
+    buffers[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  objSize * num_elements, NULL , &err);
     if ( err != CL_SUCCESS ){
         print_error( err, " clCreateBuffer failed\n" );
         align_free( output_ptr );
@@ -1140,7 +1257,14 @@
         return -1;
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
     err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
     if ( err != CL_SUCCESS ){
         print_error( err, "clEnqueueNDRangeKernel failed" );
         clReleaseMemObject( buffers[0] );
@@ -1186,6 +1310,9 @@
     cl_program  program[3];
     cl_kernel   kernel[3];
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     cl_int      err;
     int         i, j;
     size_t      ptrSizes[3];    // sizeof(int), sizeof(int2), sizeof(int4)
@@ -1207,8 +1334,7 @@
             return -1;
-        buffers[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    ptrSizes[i] * num_elements, NULL, &err);
+        buffers[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err);
         if ( err != CL_SUCCESS ){
             print_error(err, " clCreateBuffer failed\n" );
             for ( j = 0; j < i; j++ ){
@@ -1267,7 +1393,14 @@
             return -1;
+        err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+        test_error( err, "Unable to get work group size to use" );
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
         if ( err != CL_SUCCESS ){
             print_error( err, "clEnqueueNDRangeKernel failed" );
             clReleaseMemObject( buffers[i] );
diff --git a/test_conformance/buffers/test_buffer_write.cpp b/test_conformance/buffers/test_buffer_write.cpp
index e57e1c1..0f67280 100644
--- a/test_conformance/buffers/test_buffer_write.cpp
+++ b/test_conformance/buffers/test_buffer_write.cpp
@@ -24,6 +24,7 @@
 #include "procs.h"
 #include "harness/errorHelpers.h"
+#define USE_LOCAL_WORK_GROUP    1
 #ifndef uchar
 typedef unsigned char uchar;
@@ -314,51 +315,40 @@
 const char *buffer_write_half_kernel_code[] = {
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
-    "__kernel void test_buffer_write_half(__global half *src, __global half "
-    "*dst)\n"
+    "__kernel void test_buffer_write_half(__global half *src, __global float *dst)\n"
     "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = src[tid];\n"
+    "    dst[tid] = vload_half( tid * 2, src );\n"
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
-    "__kernel void test_buffer_write_half2(__global half2 *src, __global half2 "
-    "*dst)\n"
+    "__kernel void test_buffer_write_half2(__global half2 *src, __global float2 *dst)\n"
     "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = src[tid];\n"
+    "    dst[tid] = vload_half2( tid * 2, src );\n"
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
-    "__kernel void test_buffer_write_half4(__global half4 *src, __global half4 "
-    "*dst)\n"
+    "__kernel void test_buffer_write_half4(__global half4 *src, __global float4 *dst)\n"
     "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = src[tid];\n"
+    "    dst[tid] = vload_half4( tid * 2, src );\n"
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
-    "__kernel void test_buffer_write_half8(__global half8 *src, __global half8 "
-    "*dst)\n"
+    "__kernel void test_buffer_write_half8(__global half8 *src, __global float8 *dst)\n"
     "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = src[tid];\n"
+    "    dst[tid] = vload_half8( tid * 2, src );\n"
-    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
-    "__kernel void test_buffer_write_half16(__global half16 *src, __global "
-    "half16 *dst)\n"
+    "__kernel void test_buffer_write_half16(__global half16 *src, __global float16 *dst)\n"
     "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = src[tid];\n"
-    "}\n"
+    "    dst[tid] = vload_half16( tid * 2, src );\n"
+    "}\n" };
 static const char *half_kernel_name[] = { "test_buffer_write_half", "test_buffer_write_half2", "test_buffer_write_half4", "test_buffer_write_half8", "test_buffer_write_half16" };
@@ -564,8 +554,8 @@
 static int verify_write_half( void *ptr1, void *ptr2, int n )
     int     i;
-    cl_half *inptr = (cl_half *)ptr1;
-    cl_half *outptr = (cl_half *)ptr2;
+    cl_ushort   *inptr = (cl_ushort *)ptr1;
+    cl_ushort   *outptr = (cl_ushort *)ptr2;
     for ( i = 0; i < n; i++ ){
         if ( outptr[i] != inptr[i] )
@@ -624,13 +614,17 @@
 int test_buffer_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
                        void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int), MTdata d )
+    cl_mem      buffers[10];
     void        *outptr[5];
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
     size_t      ptrSizes[5];
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     cl_int      err;
-    int i;
+    int         i, ii;
     int         src_flag_id, dst_flag_id;
     int         total_errors = 0;
@@ -644,34 +638,19 @@
     ptrSizes[3] = ptrSizes[2] << 1;
     ptrSizes[4] = ptrSizes[3] << 1;
-    loops = (loops < 5 ? loops : 5);
-    for (i = 0; i < loops; i++)
-    {
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &kernelCode[i], kernelName[i]);
-        if (err)
-        {
-            log_error(" Error creating program for %s\n", type);
-            return -1;
-        }
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
-            {
-                clMemWrapper buffers[2];
+            loops = ( loops < 5 ? loops : 5 );
+            for ( i = 0; i < loops; i++ ){
+                ii = i << 1;
                 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                    buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
-                                                ptrSizes[i] * num_elements,
-                                                inptr[i], &err);
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
-                    buffers[0] =
-                        clCreateBuffer(context, flag_set[src_flag_id],
-                                       ptrSizes[i] * num_elements, NULL, &err);
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
-                if (!buffers[0] || err)
-                {
+                if ( ! buffers[ii] || err){
                     align_free( outptr[i] );
                     print_error(err, " clCreateBuffer failed\n" );
                     return -1;
@@ -679,26 +658,19 @@
                 if ( ! strcmp( type, "half" ) ){
                     outptr[i] = align_malloc( ptrSizes[i] * (num_elements * 2 ), min_alignment);
                     if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
-                        buffers[1] = clCreateBuffer(
-                            context, flag_set[dst_flag_id],
-                            ptrSizes[i] * 2 * num_elements, outptr[i], &err);
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * 2 * num_elements, outptr[i], &err);
-                        buffers[1] = clCreateBuffer(
-                            context, flag_set[dst_flag_id],
-                            ptrSizes[i] * 2 * num_elements, NULL, &err);
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * 2 * num_elements, NULL, &err);
                     outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
                     if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
-                        buffers[1] = clCreateBuffer(
-                            context, flag_set[dst_flag_id],
-                            ptrSizes[i] * num_elements, outptr[i], &err);
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
-                        buffers[1] = clCreateBuffer(
-                            context, flag_set[dst_flag_id],
-                            ptrSizes[i] * num_elements, NULL, &err);
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
                 if ( err ){
+                    clReleaseMemObject(buffers[ii]);
                     align_free( outptr[i] );
                     print_error(err, " clCreateBuffer failed\n" );
                     return -1;
@@ -706,77 +678,106 @@
                 if (gTestMap) {
                     void *dataPtr;
-                    dataPtr = clEnqueueMapBuffer(
-                        queue, buffers[0], CL_TRUE, CL_MAP_WRITE, 0,
-                        ptrSizes[i] * num_elements, 0, NULL, NULL, &err);
+                    dataPtr = clEnqueueMapBuffer(queue, buffers[ii], CL_TRUE, CL_MAP_WRITE, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
                     if (err) {
                         print_error(err, "clEnqueueMapBuffer failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         return -1;
                     memcpy(dataPtr, inptr[i], ptrSizes[i]*num_elements);
-                    err = clEnqueueUnmapMemObject(queue, buffers[0], dataPtr, 0,
-                                                  NULL, NULL);
+                    err = clEnqueueUnmapMemObject(queue, buffers[ii], dataPtr, 0, NULL, NULL);
                     if (err) {
                         print_error(err, "clEnqueueUnmapMemObject failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         return -1;
                 else if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
-                    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
-                                               ptrSizes[i] * num_elements,
-                                               inptr[i], 0, NULL, NULL);
+                    err = clEnqueueWriteBuffer(queue, buffers[ii], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL);
                     if ( err != CL_SUCCESS ){
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         print_error( err, " clWriteBuffer failed" );
                         return -1;
-                err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
-                                     (void *)&buffers[0]);
-                err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
-                                      (void *)&buffers[1]);
+                err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+                if ( err ){
+                    clReleaseMemObject(buffers[ii]);
+                    clReleaseMemObject(buffers[ii+1]);
+                    align_free( outptr[i] );
+                    log_error( " Error creating program for %s\n", type );
+                    return -1;
+                }
+                err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+                test_error( err, "Unable to get work group size to use" );
+                err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
+                err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
                 if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     print_error( err, " clSetKernelArg failed" );
                     return -1;
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
                 if ( err != CL_SUCCESS ){
                     print_error( err, " clEnqueueNDRangeKernel failed" );
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     return -1;
-                err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
-                                          ptrSizes[i] * num_elements, outptr[i],
-                                          0, NULL, NULL);
+                if ( ! strcmp( type, "half" ) ){
+                    err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+                }
+                else{
+                    err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+                }
                 if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     print_error( err, " clEnqueueReadBuffer failed" );
                     return -1;
                 if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
-                    log_error(
-                        " %s%d test failed. cl_mem_flags src: %s dst: %s\n",
-                        type, 1 << i, flag_set_names[src_flag_id],
-                        flag_set_names[dst_flag_id]);
+                    log_error( " %s%d test failed\n", type, 1<<i );
-                    log_info(
-                        " %s%d test passed. cl_mem_flags src: %s dst: %s\n",
-                        type, 1 << i, flag_set_names[src_flag_id],
-                        flag_set_names[dst_flag_id]);
+                    log_info( " %s%d test passed\n", type, 1<<i );
                 // cleanup
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
         } // dst cl_mem_flag
@@ -791,16 +792,19 @@
 int test_buffer_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    cl_mem      buffers[10];
     void        *outptr[5];
     TestStruct  *inptr[5];
-    clProgramWrapper program[5];
-    clKernelWrapper kernel[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
     size_t      ptrSizes[5];
     size_t      size = sizeof( TestStruct );
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     cl_int      err;
-    int i;
+    int         i, ii;
     cl_uint     j;
     int         loops = 1;      // no vector for structs
     int         src_flag_id, dst_flag_id;
@@ -817,25 +821,12 @@
     ptrSizes[3] = ptrSizes[2] << 1;
     ptrSizes[4] = ptrSizes[3] << 1;
-    loops = (loops < 5 ? loops : 5);
-    for (i = 0; i < loops; i++)
-    {
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
-        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
-                                          &struct_kernel_code,
-                                          "read_write_struct");
-        if (err)
-        {
-            log_error(" Error creating program for struct\n");
-            free_mtdata(d);
-            return -1;
-        }
-        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
-        {
-            for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
-            {
-                clMemWrapper buffers[2];
+            loops = ( loops < 5 ? loops : 5 );
+            for ( i = 0; i < loops; i++ ){
                 inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
@@ -844,14 +835,11 @@
                     inptr[i][j].b = get_random_float( -FLT_MAX, FLT_MAX, d );
+                ii = i << 1;
                 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-                    buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
-                                                ptrSizes[i] * num_elements,
-                                                inptr[i], &err);
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
-                    buffers[0] =
-                        clCreateBuffer(context, flag_set[src_flag_id],
-                                       ptrSizes[i] * num_elements, NULL, &err);
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
                 if ( err ){
                     align_free( outptr[i] );
                     print_error(err, " clCreateBuffer failed\n" );
@@ -860,15 +848,11 @@
                 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
                 if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
-                    buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id],
-                                                ptrSizes[i] * num_elements,
-                                                outptr[i], &err);
+                    buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
-                    buffers[1] =
-                        clCreateBuffer(context, flag_set[dst_flag_id],
-                                       ptrSizes[i] * num_elements, NULL, &err);
-                if (!buffers[1] || err)
-                {
+                    buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+                if ( ! buffers[ii+1] || err){
+                    clReleaseMemObject(buffers[ii]);
                     align_free( outptr[i] );
                     print_error(err, " clCreateBuffer failed\n" );
@@ -877,11 +861,11 @@
                 if (gTestMap) {
                     void *dataPtr;
-                    dataPtr = clEnqueueMapBuffer(
-                        queue, buffers[0], CL_TRUE, CL_MAP_WRITE, 0,
-                        ptrSizes[i] * num_elements, 0, NULL, NULL, &err);
+                    dataPtr = clEnqueueMapBuffer(queue, buffers[ii], CL_TRUE, CL_MAP_WRITE, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
                     if (err) {
                         print_error(err, "clEnqueueMapBuffer failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         return -1;
@@ -889,20 +873,21 @@
                     memcpy(dataPtr, inptr[i], ptrSizes[i]*num_elements);
-                    err = clEnqueueUnmapMemObject(queue, buffers[0], dataPtr, 0,
-                                                  NULL, NULL);
+                    err = clEnqueueUnmapMemObject(queue, buffers[ii], dataPtr, 0, NULL, NULL);
                     if (err) {
                         print_error(err, "clEnqueueUnmapMemObject failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         return -1;
                 else if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
-                    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
-                                               ptrSizes[i] * num_elements,
-                                               inptr[i], 0, NULL, NULL);
+                    err = clEnqueueWriteBuffer(queue, buffers[ii], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL);
                     if ( err != CL_SUCCESS ){
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
                         align_free( outptr[i] );
                         print_error( err, " clWriteBuffer failed" );
@@ -910,29 +895,56 @@
-                err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
-                                     (void *)&buffers[0]);
-                err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
-                                      (void *)&buffers[1]);
+                err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &struct_kernel_code, "read_write_struct" );
+                if ( err ){
+                    clReleaseMemObject(buffers[ii]);
+                    clReleaseMemObject(buffers[ii+1]);
+                    align_free( outptr[i] );
+                    log_error( " Error creating program for struct\n" );
+                    free_mtdata(d);
+                    return -1;
+                }
+                err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+                test_error( err, "Unable to get work group size to use" );
+                err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
+                err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
                 if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     print_error( err, " clSetKernelArg failed" );
                     return -1;
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
                 if ( err != CL_SUCCESS ){
                     print_error( err, " clEnqueueNDRangeKernel failed" );
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     return -1;
-                err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
-                                          ptrSizes[i] * num_elements, outptr[i],
-                                          0, NULL, NULL);
+                err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
                 if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
                     align_free( outptr[i] );
                     print_error( err, " clEnqueueReadBuffer failed" );
@@ -940,19 +952,17 @@
                 if ( verify_write_struct( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
-                    log_error(" buffer_WRITE struct%d test failed. "
-                              "cl_mem_flags src: %s dst: %s\n",
-                              1 << i, flag_set_names[src_flag_id],
-                              flag_set_names[dst_flag_id]);
+                    log_error( " buffer_WRITE struct%d test failed\n", 1<<i );
-                    log_info(" buffer_WRITE struct%d test passed. cl_mem_flags "
-                             "src: %s dst: %s\n",
-                             1 << i, flag_set_names[src_flag_id],
-                             flag_set_names[dst_flag_id]);
+                    log_info( " buffer_WRITE struct%d test passed\n", 1<<i );
                 // cleanup
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
                 align_free( outptr[i] );
                 align_free( (void *)inptr[i] );
@@ -976,6 +986,9 @@
     cl_event    event[2];
     size_t      ptrSizes[5];
     size_t      global_work_size[3];
+    size_t      local_work_size[3];
     cl_int      err;
     int         i, ii;
     int         src_flag_id, dst_flag_id;
@@ -1032,6 +1045,11 @@
                     return -1;
+                err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+                test_error( err, "Unable to get work group size to use" );
                 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
                 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
                 if ( err != CL_SUCCESS ){
@@ -1055,8 +1073,11 @@
                     return -1;
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
                 if (err != CL_SUCCESS){
                     print_error( err, "clEnqueueNDRangeKernel failed" );
                     return -1;
@@ -1377,7 +1398,6 @@
 int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
     float   *inptr[5];
     size_t  ptrSizes[5];
     int     i, err;
@@ -1402,10 +1422,8 @@
             inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
-    err = test_buffer_write(deviceID, context, queue, num_elements,
-                            sizeof(cl_half), (char *)"half", 5, (void **)inptr,
-                            buffer_write_half_kernel_code, half_kernel_name,
-                            foo, d);
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5, (void**)inptr,
+                             buffer_write_half_kernel_code, half_kernel_name, foo, d );
     for ( i = 0; i < 5; i++ ){
         align_free( (void *)inptr[i] );
diff --git a/test_conformance/buffers/test_image_migrate.cpp b/test_conformance/buffers/test_image_migrate.cpp
index dbdca9c..31bb0a2 100644
--- a/test_conformance/buffers/test_image_migrate.cpp
+++ b/test_conformance/buffers/test_image_migrate.cpp
@@ -345,9 +345,9 @@
             if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 2, NULL, wgs, wls, 0, NULL, NULL)) != CL_SUCCESS) {
-                print_error(err, "Failed enqueuing the NDRange kernel.");
-                failed = 1;
-                goto cleanup;
+              print_error(err, "Failed enqueueing the NDRange kernel.");
+              failed = 1;
+              goto cleanup;
           // Verify the results as long as neither input is an undefined migration
diff --git a/test_conformance/c11_atomics/common.cpp b/test_conformance/c11_atomics/common.cpp
index 668d7b5..bebad89 100644
--- a/test_conformance/c11_atomics/common.cpp
+++ b/test_conformance/c11_atomics/common.cpp
@@ -44,12 +44,16 @@
   switch (scopeType)
-      case MEMORY_SCOPE_EMPTY: return "";
-      case MEMORY_SCOPE_WORK_GROUP: return "memory_scope_work_group";
-      case MEMORY_SCOPE_DEVICE: return "memory_scope_device";
-      case MEMORY_SCOPE_ALL_DEVICES: return "memory_scope_all_devices";
-      case MEMORY_SCOPE_ALL_SVM_DEVICES: return "memory_scope_all_svm_devices";
-      default: return 0;
+    return "";
+    return "memory_scope_work_group";
+    return "memory_scope_device";
+    return "memory_scope_all_svm_devices";
+  default:
+    return 0;
@@ -202,80 +206,3 @@
 template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
 template<> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue() {return CL_FLT_MAX;}
 template<> cl_double AtomicTypeExtendedInfo<cl_double>::MaxValue() {return CL_DBL_MAX;}
-cl_int getSupportedMemoryOrdersAndScopes(
-    cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders,
-    std::vector<TExplicitMemoryScopeType> &memoryScopes)
-    // The CL_DEVICE_ATOMIC_MEMORY_CAPABILITES is missing before 3.0, but since
-    // all orderings and scopes are required for 2.X devices and this test is
-    // skipped before 2.0 we can safely return all orderings and scopes if the
-    // device is 2.X. Query device for the supported orders.
-    if (get_device_cl_version(device) < Version{ 3, 0 })
-    {
-        memoryOrders.push_back(MEMORY_ORDER_EMPTY);
-        memoryOrders.push_back(MEMORY_ORDER_RELAXED);
-        memoryOrders.push_back(MEMORY_ORDER_ACQUIRE);
-        memoryOrders.push_back(MEMORY_ORDER_RELEASE);
-        memoryOrders.push_back(MEMORY_ORDER_ACQ_REL);
-        memoryOrders.push_back(MEMORY_ORDER_SEQ_CST);
-        memoryScopes.push_back(MEMORY_SCOPE_EMPTY);
-        memoryScopes.push_back(MEMORY_SCOPE_WORK_GROUP);
-        memoryScopes.push_back(MEMORY_SCOPE_DEVICE);
-        memoryScopes.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES);
-        return CL_SUCCESS;
-    }
-    // For a 3.0 device we can query the supported orderings and scopes
-    // directly.
-    cl_device_atomic_capabilities atomic_capabilities{};
-    test_error(
-        clGetDeviceInfo(device, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-                        sizeof(atomic_capabilities), &atomic_capabilities,
-                        nullptr),
-        "clGetDeviceInfo failed for CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES\n");
-    // Provided we succeeded, we can start filling the vectors.
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED)
-    {
-        memoryOrders.push_back(MEMORY_ORDER_RELAXED);
-    }
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL)
-    {
-        memoryOrders.push_back(MEMORY_ORDER_ACQUIRE);
-        memoryOrders.push_back(MEMORY_ORDER_RELEASE);
-        memoryOrders.push_back(MEMORY_ORDER_ACQ_REL);
-    }
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST)
-    {
-        // The functions not ending in explicit have the same semantics as the
-        // corresponding explicit function with memory_order_seq_cst for the
-        // memory_order argument.
-        memoryOrders.push_back(MEMORY_ORDER_EMPTY);
-        memoryOrders.push_back(MEMORY_ORDER_SEQ_CST);
-    }
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP)
-    {
-        memoryScopes.push_back(MEMORY_SCOPE_WORK_GROUP);
-    }
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE)
-    {
-        // The functions that do not have memory_scope argument have the same
-        // semantics as the corresponding functions with the memory_scope
-        // argument set to memory_scope_device.
-        memoryScopes.push_back(MEMORY_SCOPE_EMPTY);
-        memoryScopes.push_back(MEMORY_SCOPE_DEVICE);
-    }
-    if (atomic_capabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES)
-    {
-        // OpenCL 3.0 added memory_scope_all_devices as an alias for
-        // memory_scope_all_svm_devices, so test both.
-        memoryScopes.push_back(MEMORY_SCOPE_ALL_DEVICES);
-        memoryScopes.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES);
-    }
-    return CL_SUCCESS;
diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index bbcc68c..c45e1aa 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -35,26 +35,25 @@
 enum TExplicitAtomicType
 enum TExplicitMemoryScopeType
 extern bool gHost; // temporary flag for testing native host threads (test verification)
@@ -66,16 +65,10 @@
 extern bool gDebug; // print OpenCL kernel code
 extern int gInternalIterations; // internal test iterations for atomic operation, sufficient to verify atomicity
 extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device
-extern cl_device_atomic_capabilities gAtomicMemCap,
-    gAtomicFenceCap; // atomic memory and fence capabilities for this device
 extern const char *get_memory_order_type_name(TExplicitMemoryOrderType orderType);
 extern const char *get_memory_scope_type_name(TExplicitMemoryScopeType scopeType);
-extern cl_int getSupportedMemoryOrdersAndScopes(
-    cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders,
-    std::vector<TExplicitMemoryScopeType> &memoryScopes);
 class AtomicTypeInfo
@@ -288,89 +281,6 @@
       return 0;
-  int CheckCapabilities(TExplicitMemoryScopeType memoryScope,
-                        TExplicitMemoryOrderType memoryOrder)
-  {
-      /*
-          Differentiation between atomic fence and other atomic operations
-          does not need to occur here.
-          The initialisation of this test checks that the minimum required
-          capabilities are supported by this device.
-          The following switches allow the test to skip if optional capabilites
-          are not supported by the device.
-        */
-      switch (memoryScope)
-      {
-          case MEMORY_SCOPE_EMPTY: {
-              break;
-          }
-          case MEMORY_SCOPE_WORK_GROUP: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_SCOPE_DEVICE: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_SCOPE_ALL_DEVICES: // fallthough
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          default: {
-              log_info("Invalid memory scope\n");
-              break;
-          }
-      }
-      switch (memoryOrder)
-      {
-          case MEMORY_ORDER_EMPTY: {
-              break;
-          }
-          case MEMORY_ORDER_RELAXED: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_ORDER_ACQUIRE:
-          case MEMORY_ORDER_RELEASE:
-          case MEMORY_ORDER_ACQ_REL: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_ORDER_SEQ_CST: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          default: {
-              log_info("Invalid memory order\n");
-              break;
-          }
-      }
-      return 0;
-  }
   virtual bool SVMDataBufferAllSVMConsistent() {return false;}
   bool UseSVM() {return _useSVM;}
   void StartValue(HostDataType startValue) {_startValue = startValue;}
@@ -429,7 +339,6 @@
   using CBasicTest<HostAtomicType, HostDataType>::LocalMemory;
   using CBasicTest<HostAtomicType, HostDataType>::MaxGroupSize;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
   CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
@@ -480,10 +389,6 @@
       MaxGroupSize(16); // increase number of groups by forcing smaller group size
       MaxGroupSize(0); // group size limited by device capabilities
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
     return CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
   virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
@@ -493,11 +398,16 @@
     std::vector<TExplicitMemoryScopeType> memoryScope;
     int error = 0;
-    // For OpenCL-3.0 and later some orderings and scopes are optional, so here
-    // we query for the supported ones.
-    test_error_ret(
-        getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope),
-        "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
+    memoryOrder.push_back(MEMORY_ORDER_EMPTY);
+    memoryOrder.push_back(MEMORY_ORDER_RELAXED);
+    memoryOrder.push_back(MEMORY_ORDER_ACQUIRE);
+    memoryOrder.push_back(MEMORY_ORDER_RELEASE);
+    memoryOrder.push_back(MEMORY_ORDER_ACQ_REL);
+    memoryOrder.push_back(MEMORY_ORDER_SEQ_CST);
+    memoryScope.push_back(MEMORY_SCOPE_EMPTY);
+    memoryScope.push_back(MEMORY_SCOPE_WORK_GROUP);
+    memoryScope.push_back(MEMORY_SCOPE_DEVICE);
+    memoryScope.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES);
     for(unsigned oi = 0; oi < memoryOrder.size(); oi++)
@@ -540,17 +450,11 @@
   virtual cl_uint MaxHostThreads()
-      // block host threads execution for memory scope different than
-      // memory_scope_all_svm_devices
-      if (MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
-          || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost)
-      {
-          return CBasicTest<HostAtomicType, HostDataType>::MaxHostThreads();
-      }
-      else
-      {
-          return 0;
-      }
+    // block host threads execution for memory scope different than memory_scope_all_svm_devices
+    if(MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost)
+      return CBasicTest<HostAtomicType, HostDataType>::MaxHostThreads();
+    else
+      return 0;
   TExplicitMemoryOrderType _memoryOrder;
@@ -566,8 +470,6 @@
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderStr;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
   CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
@@ -589,11 +491,16 @@
     std::vector<TExplicitMemoryScopeType> memoryScope;
     int error = 0;
-    // For OpenCL-3.0 and later some orderings and scopes are optional, so here
-    // we query for the supported ones.
-    test_error_ret(
-        getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope),
-        "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
+    memoryOrder.push_back(MEMORY_ORDER_EMPTY);
+    memoryOrder.push_back(MEMORY_ORDER_RELAXED);
+    memoryOrder.push_back(MEMORY_ORDER_ACQUIRE);
+    memoryOrder.push_back(MEMORY_ORDER_RELEASE);
+    memoryOrder.push_back(MEMORY_ORDER_ACQ_REL);
+    memoryOrder.push_back(MEMORY_ORDER_SEQ_CST);
+    memoryScope.push_back(MEMORY_SCOPE_EMPTY);
+    memoryScope.push_back(MEMORY_SCOPE_WORK_GROUP);
+    memoryScope.push_back(MEMORY_SCOPE_DEVICE);
+    memoryScope.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES);
     for(unsigned oi = 0; oi < memoryOrder.size(); oi++)
@@ -610,15 +517,6 @@
-          if (CheckCapabilities(MemoryScope(), MemoryOrder())
-              == TEST_SKIPPED_ITSELF)
-              continue; // skip test - not applicable
-          if (CheckCapabilities(MemoryScope(), MemoryOrder2())
-              == TEST_SKIPPED_ITSELF)
-              continue; // skip test - not applicable
           EXECUTE_TEST(error, (CBasicTest<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
@@ -802,35 +700,23 @@
-      // memory_order_relaxed is sufficient for these initialization operations
-      // as the barrier below will act as a fence, providing an order to the
-      // operations. memory_scope_work_group is sufficient as local memory is
-      // only visible within the work-group.
-      code += R"(
-              // initialize atomics not reachable from host (first thread
-              // is doing this, other threads are waiting on barrier)
-              if(get_local_id(0) == 0)
-                for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)
-                {)";
-      if (aTypeName == "atomic_flag")
-      {
-          code += R"(
-                  if(finalDest[dstItemIdx])
-                    atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
-                                                      memory_order_relaxed,
-                                                      memory_scope_work_group);
-                  else
-                    atomic_flag_clear_explicit(destMemory+dstItemIdx,
-                                               memory_order_relaxed,
-                                               memory_scope_work_group);)";
-      }
+    code +=
+      "  // initialize atomics not reachable from host (first thread is doing this, other threads are waiting on barrier)\n"
+      "  if(get_local_id(0) == 0)\n"
+      "    for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n"
+      "    {\n";
+    if(aTypeName == "atomic_flag")
+    {
+      code +=
+        "      if(finalDest[dstItemIdx])\n"
+        "        atomic_flag_test_and_set(destMemory+dstItemIdx);\n"
+        "      else\n"
+        "        atomic_flag_clear(destMemory+dstItemIdx);\n";
+    }
-        code += R"(
-                atomic_store_explicit(destMemory+dstItemIdx,
-                                      finalDest[dstItemIdx],
-                                      memory_order_relaxed,
-                                      memory_scope_work_group);)";
+      code +=
+        "      atomic_store(destMemory+dstItemIdx, finalDest[dstItemIdx]);\n";
     code +=
       "    }\n"
@@ -887,29 +773,20 @@
         "  if(get_local_id(0) == 0) // first thread in workgroup\n";
       // global atomics declared in program scope
-      code += R"(
-                if(atomic_fetch_add_explicit(&finishedThreads, 1u,
-                                           memory_order_relaxed,
-                                           memory_scope_work_group)
-                   == get_global_size(0)-1) // last finished thread
-                   )";
+      code +=
+      "  if(atomic_fetch_add(&finishedThreads, 1) == get_global_size(0)-1)\n"
+      "    // last finished thread\n";
     code +=
         "    for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n";
     if(aTypeName == "atomic_flag")
-        code += R"(
-                finalDest[dstItemIdx] =
-                    atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
-                                                      memory_order_relaxed,
-                                                      memory_scope_work_group);)";
+      code +=
+        "      finalDest[dstItemIdx] = atomic_flag_test_and_set(destMemory+dstItemIdx);\n";
-        code += R"(
-                finalDest[dstItemIdx] =
-                    atomic_load_explicit(destMemory+dstItemIdx,
-                                         memory_order_relaxed,
-                                         memory_scope_work_group);)";
+      code +=
+        "      finalDest[dstItemIdx] = atomic_load(destMemory+dstItemIdx);\n";
   code += "}\n"
@@ -971,76 +848,50 @@
   if(deviceThreadCount > 0)
-      // This loop iteratively reduces the workgroup size by 2 and then
-      // re-generates the kernel with the reduced
-      // workgroup size until we find a size which is admissible for the kernel
-      // being run or reduce the wg size
-      // to the trivial case of 1 (which was separately verified to be accurate
-      // for the kernel being run)
+    cl_ulong usedLocalMemory;
+    cl_ulong totalLocalMemory;
+    cl_uint maxWorkGroupSize;
-      while ((CurrentGroupSize() > 1))
-      {
-          // Re-generate the kernel code with the current group size
-          if (kernel) clReleaseKernel(kernel);
-          if (program) clReleaseProgram(program);
-          programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems)
-              + FunctionCode() + KernelCode(numDestItems);
-          programLine = programSource.c_str();
-          if (create_single_kernel_helper_with_build_options(
-                  context, &program, &kernel, 1, &programLine,
-                  "test_atomic_kernel", gOldAPI ? "" : nullptr))
-          {
-              return -1;
-          }
-          // Get work group size for the new kernel
-          error = clGetKernelWorkGroupInfo(kernel, deviceID,
-                                           CL_KERNEL_WORK_GROUP_SIZE,
-                                           sizeof(groupSize), &groupSize, NULL);
-          test_error(error,
-                     "Unable to obtain max work group size for device and "
-                     "kernel combo");
+    // Set up the kernel code
+    programSource = PragmaHeader(deviceID)+ProgramHeader(numDestItems)+FunctionCode()+KernelCode(numDestItems);
+    programLine = programSource.c_str();
+    if(create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &programLine, "test_atomic_kernel",
+      gOldAPI ? "" : "-cl-std=CL2.0"))
+    {
+      return -1;
+    }
+    if(gDebug)
+    {
+      log_info("Program source:\n");
+      log_info("%s\n", programLine);
+    }
+    // tune up work sizes based on kernel info
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(groupSize), &groupSize, NULL);
+    test_error(error, "Unable to obtain max work group size for device and kernel combo");
-          if (LocalMemory())
-          {
-              cl_ulong usedLocalMemory;
-              cl_ulong totalLocalMemory;
-              cl_uint maxWorkGroupSize;
+    if(LocalMemory())
+    {
+      error = clGetKernelWorkGroupInfo (kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(usedLocalMemory), &usedLocalMemory, NULL);
+      test_error(error, "clGetKernelWorkGroupInfo failed");
-              error = clGetKernelWorkGroupInfo(
-                  kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE,
-                  sizeof(usedLocalMemory), &usedLocalMemory, NULL);
-              test_error(error, "clGetKernelWorkGroupInfo failed");
+      error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(totalLocalMemory), &totalLocalMemory, NULL);
+      test_error(error, "clGetDeviceInfo failed");
-              error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
-                                      sizeof(totalLocalMemory),
-                                      &totalLocalMemory, NULL);
-              test_error(error, "clGetDeviceInfo failed");
+      // We know that each work-group is going to use typeSize * deviceThreadCount bytes of local memory
+      // so pick the maximum value for deviceThreadCount that uses all the local memory.
+      maxWorkGroupSize = ((totalLocalMemory - usedLocalMemory) / typeSize);
-              // We know that each work-group is going to use typeSize *
-              // deviceThreadCount bytes of local memory
-              // so pick the maximum value for deviceThreadCount that uses all
-              // the local memory.
-              maxWorkGroupSize =
-                  ((totalLocalMemory - usedLocalMemory) / typeSize);
+      if(maxWorkGroupSize < groupSize)
+        groupSize = maxWorkGroupSize;
+    }
-              if (maxWorkGroupSize < groupSize) groupSize = maxWorkGroupSize;
-          }
-          if (CurrentGroupSize() <= groupSize)
-              break;
-          else
-              CurrentGroupSize(CurrentGroupSize() / 2);
-      }
+    CurrentGroupSize((cl_uint)groupSize);
     if(CurrentGroupSize() > deviceThreadCount)
     if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
       deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount);
     threadCount = deviceThreadCount+hostThreadCount;
-  if (gDebug)
-  {
-      log_info("Program source:\n");
-      log_info("%s\n", programLine);
-  }
   if(deviceThreadCount > 0)
     log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount, CurrentGroupSize());
   if(hostThreadCount > 0)
@@ -1048,7 +899,7 @@
-  // Generate ref data if we have a ref generator provided
+  // Generate ref data if we have a ref generator provided		
   d = init_genrand(gRandomSeed);
   if(GenerateRefs(threadCount, &startRefValues[0], d))
@@ -1085,13 +936,11 @@
       return -1;
     memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems);
-    streams[0] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                                typeSize * numDestItems, svmAtomicBuffer, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), typeSize * numDestItems, svmAtomicBuffer, NULL);
-      streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                  typeSize * numDestItems, &destItems[0], NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * numDestItems, &destItems[0], NULL);
   if (!streams[0])
@@ -1112,18 +961,12 @@
       memcpy(svmDataBuffer, &startRefValues[0], typeSize*threadCount*NumNonAtomicVariablesPerThread());
-    streams[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                                typeSize * threadCount
-                                    * NumNonAtomicVariablesPerThread(),
-                                svmDataBuffer, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), typeSize*threadCount*NumNonAtomicVariablesPerThread(), svmDataBuffer, NULL);
-      streams[1] = clCreateBuffer(
-          context,
-          ((startRefValues.size() ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
-          typeSize * threadCount * NumNonAtomicVariablesPerThread(),
-          startRefValues.size() ? &startRefValues[0] : 0, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)((startRefValues.size() ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
+      typeSize * threadCount*NumNonAtomicVariablesPerThread(), startRefValues.size() ? &startRefValues[0] : 0, NULL);
   if (!streams[1])
diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp
index 3132c40..687861e 100644
--- a/test_conformance/c11_atomics/main.cpp
+++ b/test_conformance/c11_atomics/main.cpp
@@ -26,8 +26,6 @@
 bool gDebug = false; // always print OpenCL kernel code
 int gInternalIterations = 10000; // internal test iterations for atomic operation, sufficient to verify atomicity
 int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
-cl_device_atomic_capabilities gAtomicMemCap,
-    gAtomicFenceCap; // atomic memory and fence capabilities for this device
 extern int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
@@ -110,97 +108,11 @@
 test_status InitCL(cl_device_id device) {
     auto version = get_device_cl_version(device);
     auto expected_min_version = Version(2, 0);
     if (version < expected_min_version)
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
+        version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
         return TEST_SKIP;
-    if (version >= Version(3, 0))
-    {
-        cl_int error;
-        error = clGetDeviceInfo(device, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-                                sizeof(gAtomicMemCap), &gAtomicMemCap, NULL);
-        if (error != CL_SUCCESS)
-        {
-            print_error(error, "Unable to get atomic memory capabilities\n");
-            return TEST_FAIL;
-        }
-        error =
-            clGetDeviceInfo(device, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
-                            sizeof(gAtomicFenceCap), &gAtomicFenceCap, NULL);
-        if (error != CL_SUCCESS)
-        {
-            print_error(error, "Unable to get atomic fence capabilities\n");
-            return TEST_FAIL;
-        }
-        if ((gAtomicFenceCap
-            == 0)
-        {
-            log_info(
-                "Minimum atomic fence capabilities unsupported by device\n");
-            return TEST_FAIL;
-        }
-        if ((gAtomicMemCap
-            == 0)
-        {
-            log_info(
-                "Minimum atomic memory capabilities unsupported by device\n");
-            return TEST_FAIL;
-        }
-        // Disable program scope global variable testing in the case that it is
-        // not supported on an OpenCL-3.0 driver.
-        size_t max_global_variable_size{};
-        test_error_ret(clGetDeviceInfo(device,
-                                       CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
-                                       sizeof(max_global_variable_size),
-                                       &max_global_variable_size, nullptr),
-                       "Unable to get max global variable size\n", TEST_FAIL);
-        if (0 == max_global_variable_size)
-        {
-            gNoGlobalVariables = true;
-        }
-        // Disable generic address space testing in the case that it is not
-        // supported on an OpenCL-3.0 driver.
-        cl_bool generic_address_space_support{};
-        test_error_ret(
-            clGetDeviceInfo(device, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT,
-                            sizeof(generic_address_space_support),
-                            &generic_address_space_support, nullptr),
-            "Unable to get generic address space support\n", TEST_FAIL);
-        if (CL_FALSE == generic_address_space_support)
-        {
-            gNoGenericAddressSpace = true;
-        }
-    }
-    else
-    {
-        // OpenCL 2.x device, default to all capabilities
-    }
     return TEST_PASS;
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index c3a190b..f12f955 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -29,9 +29,7 @@
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
   CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
@@ -45,10 +43,6 @@
     if(MemoryOrder() == MEMORY_ORDER_ACQUIRE ||
       MemoryOrder() == MEMORY_ORDER_ACQ_REL)
       return 0; //skip test - not applicable
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
     return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
   virtual std::string ProgramCore()
@@ -204,10 +198,7 @@
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
   CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
@@ -221,27 +212,15 @@
     if(MemoryOrder() == MEMORY_ORDER_RELEASE ||
       MemoryOrder() == MEMORY_ORDER_ACQ_REL)
       return 0; //skip test - not applicable
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
     return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
   virtual std::string ProgramCore()
-      // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
-      // should be MEMORY_ORDER_RELEASE
-      std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
-      std::string memoryOrderScopeStore =
-          (MemoryOrder() == MEMORY_ORDER_ACQUIRE)
-          ? (", memory_order_release" + MemoryScopeStr())
-          : memoryOrderScopeLoad;
-      std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
-      return "  atomic_store" + postfix + "(&destMemory[tid], tid"
-          + memoryOrderScopeStore
-          + ");\n"
-            "  oldValues[tid] = atomic_load"
-          + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      "  atomic_store(&destMemory[tid], tid);\n"
+      "  oldValues[tid] = atomic_load"+postfix+"(&destMemory[tid]"+memoryOrderScope+");\n";
   virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
@@ -456,11 +435,9 @@
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrderScope;
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
   using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
   CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType, useSVM)
@@ -474,13 +451,6 @@
     if((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) ||
       (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
       return 0; // failure argument shall be no stronger than the success
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-    if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
     return CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
   virtual std::string ProgramCore()
@@ -1624,30 +1594,6 @@
       orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear());
     return orderStr + MemoryScopeStr();
-  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
-                                cl_command_queue queue)
-  {
-      // This test assumes support for the memory_scope_device scope in the case
-      // that LocalMemory() == false. Therefore we should skip this test in that
-      // configuration on a 3.0 driver since supporting the memory_scope_device
-      // scope is optionaly.
-      if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
-      {
-          if (!LocalMemory()
-              && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
-          {
-              log_info(
-                  "Skipping atomic_flag test due to use of atomic_scope_device "
-                  "which is optionally not supported on this device\n");
-              return 0; // skip test - not applicable
-          }
-      }
-      return CBasicTestMemOrderScope<HostAtomicType,
-                                     HostDataType>::ExecuteSingleTest(deviceID,
-                                                                      context,
-                                                                      queue);
-  }
   virtual std::string ProgramCore()
     std::string memoryOrderScope = MemoryOrderScopeStr();
@@ -1843,11 +1789,7 @@
   virtual bool SVMDataBufferAllSVMConsistent()
-      // Although memory_scope_all_devices doesn't mention SVM it is just an
-      // alias for memory_scope_all_svm_devices.  So both scopes interact with
-      // SVM allocations, on devices that support those, just the same.
-      return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
-          || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
+    return MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
   virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
diff --git a/test_conformance/clcpp/CMakeLists.txt b/test_conformance/clcpp/CMakeLists.txt
new file mode 100644
index 0000000..04484e7
--- /dev/null
+++ b/test_conformance/clcpp/CMakeLists.txt
@@ -0,0 +1,21 @@
diff --git a/test_conformance/clcpp/address_spaces/CMakeLists.txt b/test_conformance/clcpp/address_spaces/CMakeLists.txt
new file mode 100644
index 0000000..2b6369f
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/address_spaces/common.hpp b/test_conformance/clcpp/address_spaces/common.hpp
new file mode 100644
index 0000000..47b78ea
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/common.hpp
@@ -0,0 +1,203 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+    last_error = run_address_spaces_test(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+// This is a base class for address spaces tests.
+template <class T>
+struct address_spaces_test : public detail::base_func_type<T>
+    // output buffer type
+    typedef T type;
+    virtual ~address_spaces_test() {};
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    virtual std::string generate_program() = 0;
+    // Returns kernel names IN ORDER
+    virtual std::vector<std::string> get_kernel_names()
+    {
+        // Typical case, that is, only one kernel
+        return { this->get_kernel_name() };
+    }
+    // Return value that is expected to be in output_buffer[i]
+    virtual T operator()(size_t i, size_t work_group_size) = 0;
+    // If local size has to be set in clEnqueueNDRangeKernel()
+    // this should return true; otherwise - false;
+    virtual bool set_local_size()
+    {
+        return false;
+    }
+    // Calculates maximal work-group size (one dim)
+    virtual size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
+                                      cl_device_id device,
+                                      size_t work_group_size, // default work-group size
+                                      cl_int& error)
+    {
+        size_t wg_size = work_group_size;
+        for(auto&k : kernels)
+        {
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            wg_size = (std::min)(max_wg_size, wg_size);
+        }
+        return wg_size;
+    }
+    // This covers typical case: each kernel is executed once, every kernel
+    // has only one argument which is output buffer
+    virtual cl_int execute(const std::vector<cl_kernel>& kernels,
+                           cl_mem& output_buffer,
+                           cl_command_queue& queue,
+                           size_t work_size,
+                           size_t work_group_size)
+    {
+        cl_int err;
+        for(auto& k : kernels)
+        {
+            err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+            err = clEnqueueNDRangeKernel(
+                queue, k, 1,
+                NULL, &work_size, this->set_local_size() ? &work_group_size : NULL,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        }
+        return err;
+    }
+template <class address_spaces_test>
+int run_address_spaces_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, address_spaces_test op)
+    cl_mem buffers[1];
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    size_t wg_size;
+    size_t work_size[1];
+    cl_int err;
+    typedef typename address_spaces_test::type TYPE;
+    // Don't run test for unsupported types
+    if(!(type_supported<TYPE>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    std::string code_str = op.generate_program();
+    std::vector<std::string> kernel_names = op.get_kernel_names();
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+    // Find the max possible wg size for among all the kernels
+    wg_size = op.get_max_local_size(kernels, device, 1024, err);
+    RETURN_ON_ERROR(err);
+    work_size[0] = count;
+    if(op.set_local_size())
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count) / wg_size)
+        );
+        work_size[0] = wg_number * wg_size;
+    }
+    // output on host
+    std::vector<TYPE> output = generate_output<TYPE>(work_size[0], 9999);
+    // output buffer
+    buffers[0] = clCreateBuffer
+        (context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    // Execute test
+    err = op.execute(kernels, buffers[0], queue, work_size[0], wg_size);
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        TYPE v = op(i, wg_size);
+        if(!(are_equal(v, output[i], detail::make_value<TYPE>(0), op)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<TYPE>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+    log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<TYPE>().c_str());
+    clReleaseMemObject(buffers[0]);
+    for(auto& k : kernels)
+        clReleaseKernel(k);
+    clReleaseProgram(program);
+    return err;
diff --git a/test_conformance/clcpp/address_spaces/main.cpp b/test_conformance/clcpp/address_spaces/main.cpp
new file mode 100644
index 0000000..3bda012
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/main.cpp
@@ -0,0 +1,25 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_pointer_types.hpp"
+#include "test_storage_types.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/address_spaces/test_pointer_types.hpp b/test_conformance/clcpp/address_spaces/test_pointer_types.hpp
new file mode 100644
index 0000000..edc50b6
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/test_pointer_types.hpp
@@ -0,0 +1,411 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include "common.hpp"
+// ----------------------------
+// ---------- PRIVATE
+// ----------------------------
+template <class T>
+struct private_pointer_test : public address_spaces_test<T>
+    std::string str()
+    {
+        return "private_pointer";
+    }
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(i));
+    }
+    // Each work-item writes its global id to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(gid);\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                "    TYPE v = TYPE(gid);\n"
+                "    private_ptr<TYPE> v_ptr1(dynamic_asptr_cast<private_ptr<TYPE>>(&v));\n"
+                "    private_ptr<TYPE> v_ptr2(v_ptr1);\n"
+                "    TYPE a[] = { TYPE(0), TYPE(1) };\n"
+                "    private_ptr<TYPE> a_ptr = dynamic_asptr_cast<private_ptr<TYPE>>(a);\n"
+                "    a_ptr++;\n"
+                "    TYPE * a_ptr2 = a_ptr.get();\n"
+                "    *a_ptr2 = *v_ptr2;\n"
+                "    output[gid] = a[1];\n"
+                "}\n";        
+        #endif
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // private pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_uint16>());
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+// ----------------------------
+// ---------- LOCAL
+// ----------------------------
+template <class T>
+struct local_pointer_test : public address_spaces_test<T>
+    std::string str()
+    {
+        return "local_pointer";
+    }
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        size_t r = i / work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(r));
+    }
+    bool set_local_size()
+    {
+        return true;
+    }
+    size_t get_max_local_size(const std::vector<cl_kernel>& kernels, 
+                              cl_device_id device,
+                              size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        // Additionally this already set 2nd argument of the test kernel, so we don't
+        // have to modify execute() method.
+        error = clSetKernelArg(kernels[0], 1, sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg");
+        size_t wg_size;
+        error = clGetKernelWorkGroupInfo(
+            kernels[0], device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        wg_size = wg_size <= work_group_size ? wg_size : work_group_size;        
+        return wg_size;
+    }
+    // Every work-item writes id of its work-group to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output, "
+                                                              "local uint * local_mem_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(get_group_id(0));\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output, "
+                                                              "local_ptr<uint[]> local_mem_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    size_t lid = get_local_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                // 1st work-item in work-group writes get_group_id() to var
+                "    local<uint> var;\n"
+                "    local_ptr<uint> var_ptr = var.ptr();\n"
+                "    if(lid == 0) { *var_ptr = get_group_id(0); }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // last work-item in work-group writes var to 1st element of local_mem
+                "    local_ptr<uint[]> local_mem_ptr2(local_mem_ptr);\n"
+                "    auto local_mem_ptr3 = local_mem_ptr2.release();\n"
+                "    if(lid == (get_local_size(0) - 1)) { *(local_mem_ptr3) = var; }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // each work-item in work-group writes local_mem_ptr[0] to output[work-item-global-id]
+                "    output[gid] = local_mem_ptr[0];\n"
+                "}\n";        
+        #endif
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // local pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_uint16>());
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+// ----------------------------
+// ---------- GLOBAL
+// ----------------------------
+template <class T>
+struct global_pointer_test : public address_spaces_test<T>
+    std::string str()
+    {
+        return "global_pointer";
+    }
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(i));
+    }
+    // Each work-item writes its global id to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(gid);\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "void set_to_gid(global_ptr<TYPE> ptr)\n"
+                "{\n"
+                "    *ptr = TYPE(get_global_id(0));"
+                "}\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<TYPE[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    auto ptr = output.get();\n"
+                "    global_ptr<TYPE> ptr2(ptr);\n"
+                "    ptr2 += ptrdiff_t(gid);\n"
+                "    set_to_gid(ptr2);\n"
+                "}\n";        
+        #endif
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // global pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_uint16>());
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+// ----------------------------
+// ---------- CONSTANT
+// ----------------------------
+template <class T>
+struct constant_pointer_test : public address_spaces_test<T>
+    // m_test_value is just a random value we use in this test.
+    constant_pointer_test() : m_test_value(0xdeaddeadU)
+    {
+    }
+    std::string str()
+    {
+        return "constant_pointer";
+    }
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
+    }
+    // Each work-item writes m_test_value to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output, "
+                                                              "constant uint * const_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(const_ptr[0]);\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<TYPE[]> output, "
+                                                              "constant_ptr<uint[]> const_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    constant_ptr<uint[]> const_ptr2 = const_ptr;\n"
+                "    auto const_ptr3 = const_ptr2.get();\n"
+                "    output[gid] = *const_ptr3;\n"
+                "}\n";        
+        #endif
+    }
+    // execute() method needs to be modified, to create additional buffer
+    // and set it in 2nd arg (constant_ptr<uint[]> const_ptr)
+    cl_int execute(const std::vector<cl_kernel>& kernels,
+                   cl_mem& output_buffer,
+                   cl_command_queue& queue,
+                   size_t work_size,
+                   size_t work_group_size)
+    {           
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo");
+        // Create constant buffer
+        auto const_buff = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY), sizeof(cl_uint), NULL, &err);
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+        // Write m_test_value to const_buff
+        err = clEnqueueWriteBuffer(
+            queue, const_buff, CL_TRUE, 0, sizeof(cl_uint),
+            static_cast<void *>(&m_test_value), 0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+        err = clSetKernelArg(kernels[0], 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernels[0], 1, sizeof(const_buff), &const_buff);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+        err = clEnqueueNDRangeKernel(
+            queue, kernels[0], 1, NULL, &work_size, this->set_local_size() ? &work_group_size : NULL, 0, NULL, NULL
+        );      
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        err = clFinish(queue);
+        RETURN_ON_CL_ERROR(err, "clFinish");
+        err = clReleaseMemObject(const_buff);
+        RETURN_ON_CL_ERROR(err, "clReleaseMemObject");
+        return err;
+    }
+    cl_uint m_test_value;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // constant pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_uint16>());
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/address_spaces/test_storage_types.hpp b/test_conformance/clcpp/address_spaces/test_storage_types.hpp
new file mode 100644
index 0000000..e47f952
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/test_storage_types.hpp
@@ -0,0 +1,418 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include "common.hpp"
+// ----------------------------
+// ---------- PRIVATE
+// ----------------------------
+template <class T>
+struct private_storage_test : public address_spaces_test<T>
+    std::string str()
+    {
+        return "private_storage";
+    }
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(i));
+    }
+    // Each work-item writes its global id to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(gid);\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                "    priv<TYPE> v = { TYPE(gid) };\n"
+                "    const TYPE *v_ptr1 = &v;\n"
+                "    private_ptr<TYPE> v_ptr2 = v.ptr();\n"
+                "    TYPE v2 = *v_ptr2;\n"
+                "    priv<array<TYPE, 1>> a;\n"
+                "    *(a.begin()) = v2;\n"
+                "    output[gid] = a[0];\n"
+                "}\n";        
+        #endif
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // private storage
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_uint16>());
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+// ----------------------------
+// ---------- LOCAL
+// ----------------------------
+template <class T>
+struct local_storage_test : public address_spaces_test<T>
+    std::string str()
+    {
+        return "local_storage";
+    }
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        size_t r = i / work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(r));
+    }
+    bool set_local_size()
+    {
+        return true;
+    }
+    // Every work-item writes id of its work-group to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(get_group_id(0));\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // Using program scope local variable
+                "local<" + type_name<T>() + "> program_scope_var;"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    size_t lid = get_local_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                // 1st work-item in work-group writes get_group_id() to var
+                "    local<TYPE> var;\n"
+                "    if(lid == 0) { var = TYPE(get_group_id(0)); }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // last work-item in work-group writes var to 1st element of a
+                "    local_ptr<TYPE> var_ptr = var.ptr();\n"
+                "    TYPE var2 = *var_ptr;\n"
+                "    local<array<TYPE, 1>> a;\n"
+                "    if(lid == (get_local_size(0) - 1)) { *(a.begin()) = var2; }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // 1st work-item in work-group writes a[0] to program_scope_var
+                "    if(lid == 0) { program_scope_var = a[0]; }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                "    const TYPE *program_scope_var_ptr = &program_scope_var;\n"
+                "    output[gid] = *program_scope_var_ptr;\n"
+                "}\n";        
+        #endif
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // local storage
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_int16>());
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+// ----------------------------
+// ---------- GLOBAL
+// ----------------------------
+template <class T>
+struct global_storage_test : public address_spaces_test<T>
+    // m_test_value is just a random value we use in this test.
+    // m_test_value should not be zero.
+    global_storage_test() : m_test_value(0xdeaddeadU)
+    {
+    }
+    std::string str()
+    {
+        return "global_storage";
+    }
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
+    }
+    std::vector<std::string> get_kernel_names()
+    {
+        return 
+        {
+            this->get_kernel_name() + "1",
+            this->get_kernel_name() + "2"
+        };
+    }
+    // Every work-item writes m_test_value to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_names()[0] + "(global " + type_name<T>() + " *output, "
+                                                                  "uint test_value)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(test_value);\n"
+                "}\n"
+                "__kernel void " + this->get_kernel_names()[1] + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = output[gid];\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                // Using program scope global variable
+                "global<array<TYPE, 1>> program_scope_global_array;"
+                "__kernel void " + this->get_kernel_names()[0] + "(global_ptr<" + type_name<T>() + "[]> output, "
+                                                                  "uint test_value)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                // 1st work-item writes test_value to program_scope_global_array[0]
+                "    if(gid == 0) { program_scope_global_array[0] = test_value; }\n"
+                "}\n" 
+                "__kernel void " + this->get_kernel_names()[1] + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    static global<uint> func_scope_global_var { 0 };\n"
+                // if (func_scope_global_var == 1) is true then
+                // each work-item saves program_scope_global_array[0] to output[work-item-global-id]
+                "    if(func_scope_global_var == uint(1))\n"
+                "    {\n"
+                "        output[gid] = program_scope_global_array[0];\n"
+                "        return;\n"
+                "    }\n"
+                // 1st work-item writes 1 to func_scope_global_var
+                "    if(gid == 0) { func_scope_global_var = uint(1); }\n"
+                "}\n";         
+        #endif
+    }
+    // In this test execution is quite complicated. We have two kernels.
+    // 1st kernel tests program scope global variable, and 2nd kernel tests 
+    // function scope global variable (that's why it is run twice).
+    cl_int execute(const std::vector<cl_kernel>& kernels,
+                   cl_mem& output_buffer,
+                   cl_command_queue& queue,
+                   size_t work_size,
+                   size_t wg_size)
+    {           
+        cl_int err;
+        err = clSetKernelArg(kernels[0], 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernels[0], 1, sizeof(cl_uint), &m_test_value);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+        // Run first kernel, once.
+        // This kernel saves m_test_value to program scope global variable called program_scope_global_var
+        err = clEnqueueNDRangeKernel(
+            queue, kernels[0], 1, NULL, &work_size, this->set_local_size() ? &wg_size : NULL, 0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        err = clFinish(queue);
+        RETURN_ON_CL_ERROR(err, "clFinish")
+        err = clSetKernelArg(kernels[1], 0, sizeof(output_buffer), &output_buffer);
+        // Run 2nd kernel, twice.
+        // 1st run: program_scope_global_var is saved to function scope global array called func_scope_global_array
+        // 2nd run: each work-item saves func_scope_global_array[0] to ouput[work-item-global-id]
+        for(size_t i = 0; i < 2; i++)
+        {
+            err = clEnqueueNDRangeKernel(
+                queue, kernels[1], 1, NULL, &work_size, this->set_local_size() ? &wg_size : NULL, 0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+            err = clFinish(queue);
+            RETURN_ON_CL_ERROR(err, "clFinish")
+        }
+        return err;
+    }
+    cl_uint m_test_value;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_int16>());
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+// ----------------------------
+// ---------- CONSTANT
+// ----------------------------
+template <class T>
+struct constant_storage_test : public address_spaces_test<T>
+    // m_test_value is just a random value we use in this test.
+    constant_storage_test() : m_test_value(0xdeaddeadU)
+    {
+    }
+    std::string str()
+    {
+        return "constant_storage";
+    }
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
+    }
+    // Every work-item writes m_test_value to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(" + std::to_string(m_test_value) + ");\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // Program scope constant variable, program_scope_var == (m_test_value - 1)
+                "constant<uint> program_scope_const{ (" + std::to_string(m_test_value) + " - 1) };"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                "    static constant<uint> func_scope_const{ 1 };\n"
+                "    constant_ptr<uint> ps_const_ptr = program_scope_const.ptr();\n"
+                // "    constant_ptr<array<uint, 1>> fs_const_ptr = &func_scope_const;\n"
+                "    output[gid] = TYPE(*ps_const_ptr + func_scope_const);\n"
+                "}\n";        
+        #endif
+    }
+    cl_uint m_test_value;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_int16>());
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/api/CMakeLists.txt b/test_conformance/clcpp/api/CMakeLists.txt
new file mode 100644
index 0000000..30763d6
--- /dev/null
+++ b/test_conformance/clcpp/api/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/api/main.cpp b/test_conformance/clcpp/api/main.cpp
new file mode 100644
index 0000000..89f8f1b
--- /dev/null
+++ b/test_conformance/clcpp/api/main.cpp
@@ -0,0 +1,27 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_spec_consts.hpp"
+#include "test_ctors_dtors.hpp"
+#include "test_ctors.hpp"
+#include "test_dtors.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/api/test_ctors.hpp b/test_conformance/clcpp/api/test_ctors.hpp
new file mode 100644
index 0000000..8cdfc6e
--- /dev/null
+++ b/test_conformance/clcpp/api/test_ctors.hpp
@@ -0,0 +1,481 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include <numeric>
+#include "../common.hpp"
+// TEST 1
+// Verify that constructors are executed before any kernel is executed.
+// Verify that when present, multiple constructors are executed. The order between
+// constructors is undefined, but they should all execute.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_ctors_executed =
+    "__kernel void test_ctors_executed(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+const char * kernel_test_ctors_executed_multiple_ctors =
+    "__kernel void test_ctors_executed_multiple_ctors(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+const char * kernel_test_ctors_executed =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class(int y) { x = y;};\n"
+    "   int x;\n"
+    "};\n"
+    // global scope program variable
+    "ctor_test_class global_var(int(0xbeefbeef));\n"
+    "__kernel void test_ctors_executed(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(global_var.x != int(0xbeefbeef)) result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+const char * kernel_test_ctors_executed_multiple_ctors =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_limits>\n"
+    "using namespace cl;\n"
+    "template<class T>\n"
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class(T y) { x = y;};\n"
+    "   T x;\n"
+    "};\n"
+    // global scope program variables
+    "ctor_test_class<int> global_var0(int(0xbeefbeef));\n"
+    "ctor_test_class<uint> global_var1(uint(0xbeefbeefU));\n"
+    "ctor_test_class<float> global_var2(float(FLT_MAX));\n"
+    "__kernel void test_ctors_executed_multiple_ctors(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(global_var0.x != int(0xbeefbeef))   result = 1;\n"
+    "   if(global_var1.x != uint(0xbeefbeefU)) result = 1;\n"
+    "   if(global_var2.x != float(FLT_MAX))    result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+int test_ctors_execution(cl_device_id device,
+                         cl_context context,
+                         cl_command_queue queue,
+                         int count,
+                         std::string kernel_name,
+                         const char * kernel_source)
+    int error = CL_SUCCESS;
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name, "", false);
+    RETURN_ON_ERROR(error)
+// Normal run
+    error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name);
+    RETURN_ON_ERROR(error)
+    // host vector, size == count, output[0...count-1] == 1
+    std::vector<cl_uint> output(count, cl_uint(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_uint) * output.size(), static_cast<void *>(, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_uint) * output.size(), static_cast<void *>(, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test %s failed.", kernel_name.c_str());
+    }
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = test_ctors_execution(
+        device, context, queue, count,
+        "test_ctors_executed", kernel_test_ctors_executed
+    );
+    CHECK_ERROR(local_error);
+    error |= local_error;
+    local_error = test_ctors_execution(
+        device, context, queue, count,
+        "test_ctors_executed_multiple_ctors", kernel_test_ctors_executed_multiple_ctors
+    );
+    CHECK_ERROR(local_error);
+    error |= local_error;
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+// TEST 2
+// Verify that constructors are only executed once when multiple kernels from a program are executed.
+// How: The first kernel (test_ctors_executed_once_set) is run once. It changes values of program scope
+// variables, then the second kernel is run multiple times, each time verifying that global variables
+// have correct values (the second kernel should observe the values assigned by the first kernel, not
+// by the constructors).
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_ctors_executed_once =
+    "__kernel void test_ctors_executed_once_set()\n"
+    "{\n"
+    "}\n"
+    "__kernel void test_ctors_executed_once_read(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+const char * program_test_ctors_executed_once =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct template
+    "template<class T>\n"
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class(T y) { x = y;};\n"
+    "   T x;\n"
+    "};\n"
+    // global scope program variables
+    "ctor_test_class<int> global_var0(int(0));\n"
+    "ctor_test_class<uint> global_var1(uint(0));\n"
+    "__kernel void test_ctors_executed_once_set()\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   if(gid == 0) {\n"
+    "       global_var0.x = int(0xbeefbeef);\n"
+    "       global_var1.x = uint(0xbeefbeefU);\n"
+    "   }\n"
+    "}\n\n"
+    "__kernel void test_ctors_executed_once_read(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(global_var0.x != int(0xbeefbeef))   result = 1;\n"
+    "   if(global_var1.x != uint(0xbeefbeefU)) result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel_set_global_vars;
+    cl_kernel kernel_read_global_vars;
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel_set_global_vars,
+        program_test_ctors_executed_once, "test_ctors_executed_once_set"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel_set_global_vars,
+        program_test_ctors_executed_once, "test_ctors_executed_once_set", "", false
+    );
+    RETURN_ON_ERROR(error)
+    // Get the second kernel
+    kernel_read_global_vars = clCreateKernel(program, "test_ctors_executed_once_read", &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel");
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &kernel_set_global_vars,
+        program_test_ctors_executed_once, "test_ctors_executed_once_set"
+    );
+    RETURN_ON_ERROR(error)
+    // Get the second kernel
+    kernel_read_global_vars = clCreateKernel(program, "test_ctors_executed_once_read", &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel");
+    // Execute kernel_set_global_vars
+    work_size[0] = count;
+    error = clEnqueueNDRangeKernel(queue, kernel_set_global_vars, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    // Execute kernel_read_global_vars 4 times, each time we check if
+    // global variables have correct values.
+    // host vector, size == count, output[0...count-1] == 1
+    std::vector<cl_uint> output(count, cl_uint(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    for(size_t i = 0; i < 4; i++)
+    {
+        std::fill(output.begin(), output.end(), cl_uint(1));
+        error = clEnqueueWriteBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_uint) * output.size(),
+            static_cast<void *>(,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+        error = clSetKernelArg(kernel_read_global_vars, 0, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        work_size[0] = output.size();
+        error = clEnqueueNDRangeKernel(
+            queue, kernel_read_global_vars,
+            dim, NULL, work_size, NULL,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_uint) * output.size(),
+            static_cast<void *>(,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+        size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+        if(sum != 0)
+        {
+            error = -1;
+            CHECK_ERROR_MSG(error, "Test test_ctors_executed_onces failed.");
+        }
+    }
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel_set_global_vars);
+    clReleaseKernel(kernel_read_global_vars);
+    clReleaseProgram(program);
+    return error;
+// TEST3
+// Verify that when constructor is executed, the ND-range used is (1,1,1).
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_ctors_ndrange =
+    "__kernel void test_ctors_ndrange(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+const char * program_test_ctors_ndrange =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class() {\n"
+    "       x = get_global_size(0);\n"
+    "       y = get_global_size(1);\n"
+    "       z = get_global_size(2);\n"
+    "   };\n"
+    "   ulong x;\n"
+    "   ulong y;\n"
+    "   ulong z;\n"
+    // return true if the ND-range used when ctor was exectured was
+    // (1, 1, 1); otherwise - false
+    "   bool check() { return (x == 1) && (y == 1) && (z == 1);}"
+    "};\n"
+    // global scope program variables
+    "ctor_test_class global_var0;\n"
+    "ctor_test_class global_var1;\n"
+    "__kernel void test_ctors_ndrange(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(!global_var0.check()) result = 1;\n"
+    "   if(!global_var1.check()) result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_ctors_ndrange, "test_ctors_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_ctors_ndrange, "test_ctors_ndrange", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_ctors_ndrange, "test_ctors_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+    // host vector, size == count, output[0...count-1] == 1
+    std::vector<cl_uint> output(count, cl_uint(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test test_ctors_executed_ndrange failed.");
+    }
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
diff --git a/test_conformance/clcpp/api/test_ctors_dtors.hpp b/test_conformance/clcpp/api/test_ctors_dtors.hpp
new file mode 100644
index 0000000..02838fa
--- /dev/null
+++ b/test_conformance/clcpp/api/test_ctors_dtors.hpp
@@ -0,0 +1,185 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include "../common.hpp"
+// Verify queries clGetProgramInfo correctly return the presence of constructors and/or destructors
+// (both are present, either one is present, none is present).
+std::string generate_ctor_dtor_program(const bool ctor, const bool dtor)
+    std::string program;
+    if(ctor)
+    {
+        program +=
+            "struct ctor_test_class {\n"
+            // non-trivial ctor
+            "   ctor_test_class(int y) { x = y;};\n"
+            "   int x;\n"
+            "};\n"
+            "ctor_test_class ctor = ctor_test_class(1024);\n"
+        ;
+    }
+    if(dtor)
+    {
+        program +=
+            "struct dtor_test_class {\n"
+            // non-trivial dtor
+            "   ~dtor_test_class() { x = -1024; };\n"
+            "   int x;\n"
+            "};\n"
+            "dtor_test_class dtor;\n"
+        ;
+    }
+    program += "__kernel void test_ctor_dtor()\n {\n }\n";
+    return program;
+int test_get_program_info_global_ctors_dtors_present(cl_device_id device,
+                                                     cl_context context,
+                                                     cl_command_queue queue,
+                                                     const bool ctor,
+                                                     const bool dtor)
+    int error = CL_SUCCESS;
+    cl_program program;
+    // program source and options
+    std::string options = "";
+    std::string source = generate_ctor_dtor_program(ctor, dtor);
+    const char * source_ptr = source.c_str();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &source_ptr, options.c_str());
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    return CL_SUCCESS;
+// Normal run
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &source_ptr, options.c_str());
+    RETURN_ON_ERROR(error)
+    // This indicates that the program object contains non-trivial constructor(s) that will be
+    // executed by runtime before any kernel from the program is executed.
+    // This indicates that the program object contains non-trivial destructor(s) that will be
+    // executed by runtime when program is destroyed.
+    cl_bool ctors_present;
+    size_t cl_bool_size;
+    error = clGetProgramInfo(
+        program,
+        sizeof(cl_bool),
+        static_cast<void*>(&ctors_present),
+        &cl_bool_size
+    );
+    RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+    if(cl_bool_size != sizeof(cl_bool))
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).", cl_bool_size, sizeof(cl_bool));
+    }
+    if(ctor && ctors_present != CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 0, should be: 1.");
+    }
+    else if(!ctor && ctors_present == CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 1, should be: 0.");
+    }
+    cl_bool dtors_present = 0;
+    error = clGetProgramInfo(
+        program,
+        sizeof(cl_bool),
+        static_cast<void*>(&ctors_present),
+        &cl_bool_size
+    );
+    RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+    if(cl_bool_size != sizeof(cl_bool))
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).", cl_bool_size, sizeof(cl_bool));
+    }
+    if(dtor && dtors_present != CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 0, should be: 1.");
+    }
+    else if(!dtor && dtors_present == CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 1, should be: 0.");
+    }
+    clReleaseProgram(program);
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // both present
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, true, true);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+    // dtor
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, false, true);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+    // ctor
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, true, false);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+    // none present
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, false, false);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/api/test_dtors.hpp b/test_conformance/clcpp/api/test_dtors.hpp
new file mode 100644
index 0000000..2f4fd0c
--- /dev/null
+++ b/test_conformance/clcpp/api/test_dtors.hpp
@@ -0,0 +1,553 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include <numeric>
+#include "../common.hpp"
+// TEST 1
+// Verify that destructor is executed.
+// How: destructor of struct dtor_test_class has a side effect: zeroing buffer. If values
+// in buffer are not zeros after releasing program, destructor was not executed.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_dtor_is_executed =
+    "__kernel void test_dtor_is_executed(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+const char * program_test_dtor_is_executed =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct dtor_test_class {\n"
+    // non-trivial dtor
+    // set all values in buffer to 0
+    "   ~dtor_test_class() {\n"
+    "       for(ulong i = 0; i < size; i++)\n"
+    "           buffer[i] = 0;\n"
+    "   };\n"
+    "   global_ptr<uint[]> buffer;\n"
+    "   ulong size;\n"
+    "};\n"
+    // global scope program variable
+    "dtor_test_class global_var;\n"
+    // values in output __MUST BE__ greater than 0 for the test to work
+    // correctly
+    "__kernel void test_dtor_is_executed(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    // set buffer and size in global var
+    "   if(gid == 0){\n"
+    "       global_var.buffer = output;\n"
+    "       global_var.size = get_global_size(0);\n"
+    "   }\n"
+    "}\n"
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_is_executed, "test_dtor_is_executed"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_is_executed, "test_dtor_is_executed", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_is_executed, "test_dtor_is_executed"
+    );
+    RETURN_ON_ERROR(error)
+    // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    std::vector<cl_uint> output(count, cl_uint(0xbeefbeef));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    // Release kernel and program
+    // Dtor should be called now
+    error = clReleaseKernel(kernel);
+    RETURN_ON_CL_ERROR(error, "clReleaseKernel")
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+    // Finish
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test test_dtor_is_executed failed.");
+    }
+    clReleaseMemObject(output_buffer);
+    return error;
+// TEST 2
+// Verify that multiple destructors, if present, are executed. Order between multiple
+// destructors is undefined.
+// Verify that each destructor is executed only once.
+// How:
+// 0) dtor_test_class struct has a global pointer to a buffer, it's set by
+// test_dtors_executed_once kernel.
+// 1) Destructors have a side effect: each dtor writes to its part of the buffer. If all
+// dtors are executed, all values in that buffer should be changed.
+// 2) The first time destructors are executed, they set their parts of the buffer to zero.
+// Next time to 1, next time to 2 etc. Since dtors should be executed only once, all
+// values in that buffer should be equal to zero.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_dtors_executed_once =
+    "__kernel void test_dtors_executed_once(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+const char * program_test_dtors_executed_once =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct dtor_test_class {\n"
+    // non-trivial dtor
+    // Set all values in range [start; end - 1] in buffer to counter.
+    // If dtor is executed only once (correct), all values in range
+    // [start; end - 1] in buffer should be equal to zero after releasing
+    // the program
+    "   ~dtor_test_class() {\n"
+    "       for(ulong i = start; i < end; i++){\n"
+    "           buffer[i] = counter;\n"
+    "       };\n"
+    "       counter++;\n"
+    "   };\n"
+    "   global_ptr<uint[]> buffer;\n"
+    "   ulong start;\n"
+    "   ulong end;\n"
+    "   ulong counter;\n"
+    "};\n"
+    // global scope program variables
+    "dtor_test_class global_var0;\n"
+    "dtor_test_class global_var1;\n"
+    "dtor_test_class global_var2;\n"
+    "dtor_test_class global_var3;\n"
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    "__kernel void test_dtors_executed_once(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    // set buffer and size in global var
+    "   if(gid == 0){\n"
+    "       ulong end = get_global_size(0) / 4;"
+    // global_var0
+    "       global_var0.buffer = output;\n"
+    "       global_var0.start = 0;\n"
+    "       global_var0.end = end;\n"
+    "       global_var0.counter = 0;\n"
+    // global_var1
+    "       global_var1.buffer = output;\n"
+    "       global_var1.start = end;\n"
+    "       end += get_global_size(0) / 4;\n"
+    "       global_var1.end = end;\n"
+    "       global_var1.counter = 0;\n"
+    // global_var2
+    "       global_var2.buffer = output;\n"
+    "       global_var2.start = end;\n"
+    "       end += get_global_size(0) / 4;\n"
+    "       global_var2.end = end;\n"
+    "       global_var2.counter = 0;\n"
+    // global_var3
+    "       global_var3.buffer = output;\n"
+    "       global_var3.start = end;\n"
+    "       global_var3.end = get_global_size(0);\n"
+    "       global_var3.counter = 0;\n"
+    "   }\n"
+    "}\n"
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtors_executed_once, "test_dtors_executed_once"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtors_executed_once, "test_dtors_executed_once", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtors_executed_once, "test_dtors_executed_once"
+    );
+    RETURN_ON_ERROR(error)
+    // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    cl_uint init_value = cl_uint(0xbeefbeef);
+    std::vector<cl_uint> output(count, init_value);
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    // Increments the program reference count. Twice
+    error = clRetainProgram(program);
+    RETURN_ON_CL_ERROR(error, "clRetainProgram")
+    error = clRetainProgram(program);
+    RETURN_ON_CL_ERROR(error, "clRetainProgram")
+    // Should just decrement the program reference count.
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+    // Should just decrement the program reference count.
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+    // At this point global scope variables should not be destroyed,
+    // values in output buffer should not be modified.
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    for(auto& i : output)
+    {
+        if(i != init_value)
+        {
+            log_error("ERROR: Test test_global_scope_dtors_executed_once failed.");
+            log_error("\tDestructors were executed prematurely.\n");
+            RETURN_ON_ERROR(-1)
+        }
+    }
+    // Release kernel and program, destructors should be called now
+    error = clReleaseKernel(kernel);
+    RETURN_ON_CL_ERROR(error, "clReleaseKernel")
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+    // Finish
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        log_error("ERROR: Test test_global_scope_dtors_executed_once failed.");
+        // Maybe some dtors were not run?
+        for(auto& i : output)
+        {
+            if(i == init_value)
+            {
+                log_error("\tSome dtors were not executed.");
+                break;
+            }
+        }
+        log_error("\n");
+        RETURN_ON_ERROR(-1)
+    }
+    // Clean
+    clReleaseMemObject(output_buffer);
+    return error;
+// TEST3
+// Verify that ND-range during destructor execution is set to (1,1,1)
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_dtor_ndrange =
+    "__kernel void test_dtor_ndrange(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+const char * program_test_dtor_ndrange =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct dtor_test_class {\n"
+    // non-trivial dtor
+    // set all values in buffer to 0 only if ND-range is (1, 1, 1)
+    "   ~dtor_test_class() {\n"
+    "       if(check()){\n"
+    "           for(ulong i = 0; i < size; i++)\n"
+    "               buffer[i] = 0;\n"
+    "       }\n"
+    "   };\n"
+    // return true if the ND-range is (1, 1, 1); otherwise - false
+    "   bool check() {\n"
+    "       return (get_global_size(0) == 1)"
+              " && (get_global_size(1) == 1)"
+              " && (get_global_size(2) == 1);\n"
+    "   }"
+    "   ulong size;\n"
+    "   global_ptr<uint[]> buffer;\n"
+    "};\n"
+    // global scope program variable
+    "dtor_test_class global_var;\n"
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    "__kernel void test_dtor_ndrange(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    // set buffer and size in global var
+    "   if(gid == 0){\n"
+    "       global_var.buffer = output;\n"
+    "       global_var.size = get_global_size(0);\n"
+    "   }\n"
+    "}\n"
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_ndrange, "test_dtor_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_ndrange, "test_dtor_ndrange", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_ndrange, "test_dtor_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+    // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    std::vector<cl_uint> output(count, cl_uint(0xbeefbeef));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    // Release kernel and program
+    // Dtor should be called now
+    error = clReleaseKernel(kernel);
+    RETURN_ON_CL_ERROR(error, "clReleaseKernel")
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+    // Finish
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test test_dtor_ndrange failed.");
+    }
+    clReleaseMemObject(output_buffer);
+    return error;
diff --git a/test_conformance/clcpp/api/test_spec_consts.hpp b/test_conformance/clcpp/api/test_spec_consts.hpp
new file mode 100644
index 0000000..1d06168
--- /dev/null
+++ b/test_conformance/clcpp/api/test_spec_consts.hpp
@@ -0,0 +1,474 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include "../common.hpp"
+// TEST 1
+// Verify that if left unset the specialization constant defaults to the default value set in SPIR-V (zero).
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_spec_consts_defaults =
+    "__kernel void test_spec_consts_defaults(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+const char * kernel_test_spec_consts_defaults =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_spec_constant>\n"
+    "using namespace cl;\n"
+    "spec_constant<char,  1> spec1(0);\n"
+    "spec_constant<uchar, 2> spec2(0);\n"
+    "spec_constant<short, 3> spec3(0);\n"
+    "spec_constant<ushort,4> spec4(0);\n"
+    "spec_constant<int,   5> spec5(0);\n"
+    "spec_constant<uint,  6> spec6(0);\n"
+    "spec_constant<long,  7> spec7(0);\n"
+    "spec_constant<ulong, 8> spec8(0);\n"
+    "spec_constant<float, 9> spec9(0.0f);\n"
+    "#ifdef cl_khr_fp64\n"
+    "spec_constant<double, 10> spec10(0.0);\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "spec_constant<half, 11> spec11(0.0h);\n"
+    "#endif\n"
+    "__kernel void test_spec_consts_defaults(global_ptr<int[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(get(spec1) != char(0))   result = 1;\n"
+    "   if(get(spec2) != uchar(0))  result = 1;\n"
+    "   if(get(spec3) != short(0))  result = 1;\n"
+    "   if(get(spec4) != ushort(0)) result = 1;\n"
+    "   if(get(spec5) != int(0))    result = 1;\n"
+    "   if(get(spec6) != uint(0))   result = 1;\n"
+    "   if(get(spec7) != long(0))   result = 1;\n"
+    "   if(get(spec8) != ulong(0))  result = 1;\n"
+    "   if(get(spec9) != float(0))  result = 1;\n"
+    "#ifdef cl_khr_fp64\n"
+    "   if(get(spec10) != double(0)) result = 1;\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "   if(get(spec11) != half(0)) result = 1;\n"
+    "#endif\n"
+    "   output[gid] = result;\n"
+    "}\n"
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+    size_t dim = 1;
+    size_t work_size[1];
+    std::string options = "";
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        options += " -cl-fp16-enable";
+    }
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        options += " -cl-fp64-enable";
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", options);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", "", false);
+    RETURN_ON_ERROR(error)
+// Normal run
+    // Spec constants are NOT set before clBuildProgram (called in create_opencl_kernel), so
+    // they all should default to the default value set in SPIR-V (zero).
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", options);
+    RETURN_ON_ERROR(error)
+    // host vector, size == 1, output[0] == 1
+    std::vector<cl_int> output(1, cl_int(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKerne")
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    // if output[0] != 0, then some spec constant(s) did not default to zero.
+    if(output[0] != 0)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_defaults failed, output[0]: %d.", output[0])
+    }
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+// TEST 2
+// Verify that setting an existing specialization constant affects only
+// the value of that constant and not of other specialization constants.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_spec_consts_many_constants =
+    "__kernel void test_spec_consts_many_constants(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+const char * kernel_test_spec_consts_many_constants =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_spec_constant>\n"
+    "using namespace cl;\n"
+    "spec_constant<int, 1> spec1(0);\n"
+    "spec_constant<int, 2> spec2(0);\n"
+    "spec_constant<int, 3> spec3(0);\n"
+    "__kernel void test_spec_consts_defaults(global_ptr<int[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(get(spec1) != int(-1024)) result += 1;\n"
+    "   if(get(spec2) != int(0))     result += 2;\n"
+    "   if(get(spec3) != int(1024))  result += 4;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &kernel_test_spec_consts_many_constants);
+    RETURN_ON_ERROR(error)
+    // Set specialization constants
+    // clSetProgramSpecializationConstant(
+    //     cl_program /* program */, cl_uint /* spec_id */, size_t  /* spec_size */,const void* /* spec_value */
+    // )
+    cl_int spec1 = -1024;
+    cl_int spec3 = 1024;
+    // Set spec1
+    error = clSetProgramSpecializationConstant(program, cl_uint(1), sizeof(cl_int), static_cast<void*>(&spec1));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Specialization constant spec2 should default to zero
+    // Set spec3
+    error = clSetProgramSpecializationConstant(program, cl_uint(3), sizeof(cl_int), static_cast<void*>(&spec3));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Build program and create kernel
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
+    );
+    RETURN_ON_ERROR(error)
+    // host vector, size == 1, output[0] == 1
+    std::vector<cl_int> output(1, cl_int(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    // if output[0] != 0, then values of spec constants were incorrect
+    if(output[0] != 0)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_many_constants failed, output[0]: %d.", output[0]);
+    }
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+// TEST 3
+// Verify that the API correctly handles the size of a specialization constant by exercising
+// the API for specialization constants of different types (int, bool, float, etc.)
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_spec_consts_different_types =
+    "__kernel void test_spec_consts_different_types(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+const char * kernel_test_spec_consts_different_types =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_spec_constant>\n"
+    "#include <opencl_limits>\n"
+    "using namespace cl;\n"
+    "spec_constant<char,  1> spec1(0);\n"
+    "spec_constant<uchar, 2> spec2(0);\n"
+    "spec_constant<short, 3> spec3(0);\n"
+    "spec_constant<ushort,4> spec4(0);\n"
+    "spec_constant<int,   5> spec5(0);\n"
+    "spec_constant<uint,  6> spec6(0);\n"
+    "spec_constant<long,  7> spec7(0);\n"
+    "spec_constant<ulong, 8> spec8(0);\n"
+    "spec_constant<float, 9> spec9(0.0f);\n"
+    "#ifdef cl_khr_fp64\n"
+    "spec_constant<double, 10> spec10(0.0);\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "spec_constant<half, 11> spec11(0.0h);\n"
+    "#endif\n"
+    "__kernel void test_spec_consts_different_types(global_ptr<int[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(get(spec1) != char(CHAR_MAX))    result += 1;\n"
+    "   if(get(spec2) != uchar(UCHAR_MAX))  result += 2;\n"
+    "   if(get(spec3) != short(SHRT_MAX))   result += 4;\n"
+    "   if(get(spec4) != ushort(USHRT_MAX)) result += 8;\n"
+    "   if(get(spec5) != int(INT_MAX))      result += 16;\n"
+    "   if(get(spec6) != uint(UINT_MAX))    result += 32;\n"
+    "   if(get(spec7) != long(LONG_MAX))    result += 64;\n"
+    "   if(get(spec8) != ulong(ULONG_MAX))  result += 128;\n"
+    "   if(get(spec9) != float(FLT_MAX))    result += 256;\n"
+    "#ifdef cl_khr_fp64\n"
+    "   if(get(spec10) != double(DBL_MAX)) result += 512;\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "   if(get(spec11) != half(HALF_MAX)) result += 1024;\n"
+    "#endif\n"
+    "   output[gid] = result;\n"
+    "}\n"
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+    size_t dim = 1;
+    size_t work_size[1];
+    std::string options = "";
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        options += " -cl-fp16-enable";
+    }
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        options += " -cl-fp64-enable";
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_different_types, "test_spec_consts_different_types", options);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_different_types, "test_spec_consts_different_types", "", false);
+    RETURN_ON_ERROR(error)
+// Normal run
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &kernel_test_spec_consts_different_types, options.c_str());
+    RETURN_ON_ERROR(error)
+    // Set specialization constants
+    cl_uint spec_id = 1;
+    cl_char   spec1 = CL_CHAR_MAX;
+    cl_uchar  spec2 = CL_UCHAR_MAX;
+    cl_short  spec3 = CL_SHRT_MAX;
+    cl_ushort spec4 = CL_USHRT_MAX;
+    cl_int    spec5 = CL_INT_MAX;
+    cl_uint   spec6 = CL_UINT_MAX;
+    cl_long   spec7 = CL_LONG_MAX;
+    cl_ulong  spec8 = CL_ULONG_MAX;
+    cl_float  spec9 = CL_FLT_MAX;
+    cl_double spec10 = CL_DBL_MAX;
+    cl_half   spec11 = CL_HALF_MAX;
+    // Set spec1
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_char), static_cast<void*>(&spec1));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec2
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_uchar), static_cast<void*>(&spec2));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec3
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_short), static_cast<void*>(&spec3));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec4
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_ushort), static_cast<void*>(&spec4));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec5
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_int), static_cast<void*>(&spec5));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec6
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_uint), static_cast<void*>(&spec6));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec7
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_long), static_cast<void*>(&spec7));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec8
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_ulong), static_cast<void*>(&spec8));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec9
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_float), static_cast<void*>(&spec9));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec10
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = clSetProgramSpecializationConstant(program, cl_uint(10), sizeof(cl_double), static_cast<void*>(&spec10));
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+    // Set spec11
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        error = clSetProgramSpecializationConstant(program, cl_uint(11), sizeof(cl_half), static_cast<void*>(&spec11));
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+    // Build program and create kernel
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
+    );
+    RETURN_ON_ERROR(error)
+    // Copy output to output_buffer, run kernel, copy output_buffer back to output, check result
+    // host vector, size == 1, output[0] == 1
+    std::vector<cl_int> output(1, cl_int(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    // if output[0] != 0, then some spec constants had incorrect values
+    if(output[0] != 0)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_different_types failed, output[0]: %d.", output[0])
+    }
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
diff --git a/test_conformance/clcpp/atomics/CMakeLists.txt b/test_conformance/clcpp/atomics/CMakeLists.txt
new file mode 100644
index 0000000..4fb4bfd
--- /dev/null
+++ b/test_conformance/clcpp/atomics/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/atomics/atomic_fetch.hpp b/test_conformance/clcpp/atomics/atomic_fetch.hpp
new file mode 100644
index 0000000..5618375
--- /dev/null
+++ b/test_conformance/clcpp/atomics/atomic_fetch.hpp
@@ -0,0 +1,306 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+const size_t atomic_bucket_size = 100;
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class type>
+std::string generate_kernel_atomic_fetch(func_type func)
+    std::string in1_value = "input[gid]";
+    std::string out1_value = "output[gid / " + std::to_string(atomic_bucket_size) + "]";
+    std::string function_call = "atomic_" + func.str() + "(&" + out1_value + ", " + in1_value + ")";
+    return
+        "" + func.defs() +
+        "__kernel void test_" + func.str() + "(global " + type_name<type>() + " *input, global atomic_" + type_name<type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + function_call + ";\n"
+        "}\n";
+template <class func_type, class type>
+std::string generate_kernel_atomic_fetch(func_type func)
+    std::string in1_value = "input[gid]";
+    std::string out1_value = "output[gid / " + std::to_string(atomic_bucket_size) + "]";
+    std::string function_call = func.str() + "(" + in1_value + ")";
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + type_name<type>() +  "[]> input,"
+                                              "global_ptr<atomic<" + type_name<type>() + ">[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + out1_value + "." + function_call + ";\n"
+        "}\n";
+template<class TYPE, class atomic_fetch>
+bool verify_atomic_fetch(const std::vector<TYPE> &in, const std::vector<TYPE> &out, atomic_fetch op)
+    for (size_t i = 0; i < out.size(); i++)
+    {
+        TYPE expected = op.init_out();
+        for (size_t k = 0; k < atomic_bucket_size; k++)
+        {
+            const size_t in_i = i * atomic_bucket_size + k;
+            if (in_i >= in.size())
+                break;
+            expected = op(expected, in[in_i]);
+        }
+        if (expected != out[i])
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+template <class atomic_fetch>
+int test_atomic_fetch_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, atomic_fetch op)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+    typedef typename atomic_fetch::in_type TYPE;
+    // Don't run test for unsupported types
+    if (!(type_supported<TYPE>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    if (sizeof(TYPE) == 8 &&
+        (!is_extension_available(device, "cl_khr_int64_base_atomics") ||
+         !is_extension_available(device, "cl_khr_int64_extended_atomics")))
+    {
+        return CL_SUCCESS;
+    }
+    std::string code_str = generate_kernel_atomic_fetch<atomic_fetch, TYPE>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    std::vector<TYPE> input = generate_input<TYPE>(count, op.min1(), op.max1(), std::vector<TYPE>());
+    std::vector<TYPE> output = generate_output<TYPE>((count - 1) / atomic_bucket_size + 1);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer")
+    const TYPE pattern = op.init_out();
+    err = clEnqueueFillBuffer(queue, buffers[1], &pattern, sizeof(pattern), 0, sizeof(TYPE) * output.size(), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueFillBuffer")
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+    err = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(TYPE) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer")
+    if (!verify_atomic_fetch(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s failed", op.str().c_str(), type_name<TYPE>().c_str());
+    }
+    log_info("test_%s %s passed\n", op.str().c_str(), type_name<TYPE>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+template<class TYPE>
+struct atomic_fetch
+    typedef TYPE in_type;
+    std::string decl_str()
+    {
+        return type_name<TYPE>();
+    }
+    std::string defs()
+    {
+        std::string defs;
+        if (sizeof(TYPE) == 8)
+        {
+            defs += "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";
+            defs += "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
+        }
+        return defs;
+    }
+    std::string headers()
+    {
+        return "#include <opencl_atomic>\n";
+    }
+    TYPE min1()
+    {
+        return 0;
+    }
+    TYPE max1()
+    {
+        return 1000;
+    }
+template<class TYPE> \
+struct CLASS_NAME : public atomic_fetch<TYPE> \
+{ \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    TYPE init_out() \
+    { \
+        return INIT_OUT; \
+    } \
+    \
+    TYPE operator()(const TYPE& x, const TYPE& y) \
+    { \
+        return HOST_FUNC_EXPRESSION; \
+    } \
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_add, fetch_add, x + y, 0)
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_sub, fetch_sub, x - y, (std::numeric_limits<TYPE>::max)())
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_and, fetch_and, x & y, (std::numeric_limits<TYPE>::max)())
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_or,  fetch_or,  x | y, 0)
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_xor, fetch_xor, x ^ y, 0)
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_max, fetch_max, (std::max)(x, y), 0)
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_min, fetch_min, (std::min)(x, y), (std::numeric_limits<TYPE>::max)())
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    last_error = test_atomic_fetch_func( \
+        device, context, queue, n_elems, TEST_CLASS \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_ulong>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_ulong>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_ulong>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_ulong>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_ulong>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_ulong>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_ulong>()))
+    if (error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/atomics/main.cpp b/test_conformance/clcpp/atomics/main.cpp
new file mode 100644
index 0000000..7103998
--- /dev/null
+++ b/test_conformance/clcpp/atomics/main.cpp
@@ -0,0 +1,25 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "atomic_fetch.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/attributes/CMakeLists.txt b/test_conformance/clcpp/attributes/CMakeLists.txt
new file mode 100644
index 0000000..1b1c15a
--- /dev/null
+++ b/test_conformance/clcpp/attributes/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/attributes/main.cpp b/test_conformance/clcpp/attributes/main.cpp
new file mode 100644
index 0000000..765867e
--- /dev/null
+++ b/test_conformance/clcpp/attributes/main.cpp
@@ -0,0 +1,27 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_ivdep.hpp"
+#include "test_max_size.hpp"
+#include "test_required_num_sub_groups.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/attributes/test_ivdep.hpp b/test_conformance/clcpp/attributes/test_ivdep.hpp
new file mode 100644
index 0000000..17b1f58
--- /dev/null
+++ b/test_conformance/clcpp/attributes/test_ivdep.hpp
@@ -0,0 +1,418 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+namespace test_ivdep {
+enum class loop_kind
+    for_loop,
+    while_loop,
+    do_loop
+struct test_options
+    loop_kind loop;
+    int ivdep_length;
+    int offset1;
+    int offset2;
+    int iter_count;
+    bool offset1_param;
+    bool offset2_param;
+    bool iter_count_param;
+    bool cond_in_header;
+    bool init_in_header;
+    bool incr_in_header;
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+    std::string offset1s = options.offset1_param ? "offset1" : std::to_string(options.offset1);
+    std::string offset2s = options.offset2_param ? "offset2" : std::to_string(options.offset2);
+    std::string init = "i = 0";
+    std::string cond = std::string("i < ") + (options.iter_count_param ? "iter_count" : std::to_string(options.iter_count));
+    std::string incr = "i += 2";
+    std::stringstream s;
+    s << R"(
+    kernel void test(global int *a, global int *b, global int *c, int offset1, int offset2, int iter_count)
+    {
+        int i;
+    )";
+    // Loop #1
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset1s << "] = b[i + " << offset1s << "] * c[i + " << offset1s << "];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+    // Loop #2
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset2s << "] = a[i] + b[i];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+    s << "}" << std::endl;
+    return s.str();
+std::string generate_source(test_options options)
+    std::string offset1s = options.offset1_param ? "offset1" : std::to_string(options.offset1);
+    std::string offset2s = options.offset2_param ? "offset2" : std::to_string(options.offset2);
+    std::string init = "i = 0";
+    std::string cond = std::string("i < ") + (options.iter_count_param ? "iter_count" : std::to_string(options.iter_count));
+    std::string incr = "i += 2";
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+    using namespace cl;
+    )";
+    s << R"(
+    kernel void test(global_ptr<int[]> a, global_ptr<int[]> b, global_ptr<int[]> c, int offset1, int offset2, int iter_count)
+    {
+        int i;
+    )";
+    // Loop #1
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.ivdep_length > 0) s << "[[cl::ivdep]]" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset1s << "] = b[i + " << offset1s << "] * c[i + " << offset1s << "];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+    // Loop #2
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.ivdep_length > 0) s << "[[cl::ivdep(" << options.ivdep_length << ")]]" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset2s << "] = a[i] + b[i];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+    s << "}" << std::endl;
+    return s.str();
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+    int error = CL_SUCCESS;
+    cl_program program;
+    cl_kernel kernel;
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    const size_t count = 100;
+    const size_t global_size = 1;
+    std::vector<int> a(count);
+    std::vector<int> b(count);
+    std::vector<int> c(count);
+    for (size_t i = 0; i < count; i++)
+    {
+        a[i] = 0;
+        b[i] = i;
+        c[i] = 1;
+    }
+    cl_mem a_buffer;
+    a_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        sizeof(int) * count, static_cast<void *>(, &error
+    );
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    cl_mem b_buffer;
+    b_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        sizeof(int) * count, static_cast<void *>(, &error
+    );
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    cl_mem c_buffer;
+    c_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        sizeof(int) * count, static_cast<void *>(,&error
+    );
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 3, sizeof(cl_int), &options.offset1);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 4, sizeof(cl_int), &options.offset2);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 5, sizeof(cl_int), &options.iter_count);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    std::vector<int> a_output(count);
+    error = clEnqueueReadBuffer(
+        queue, a_buffer, CL_TRUE,
+        0, sizeof(int) * count,
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    for (int i = 0; i < options.iter_count; i += 2)
+    {
+        a[i + options.offset1] = b[i + options.offset1] * c[i + options.offset1];
+    }
+    for (int i = 0; i < options.iter_count; i += 2)
+    {
+        a[i + options.offset2] = a[i] + b[i];
+    }
+    for (size_t i = 0; i < count; i++)
+    {
+        const int value = a_output[i];
+        const int expected = a[i];
+        if (value != expected)
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "Test failed. Element %lu: %d should be: %d",
+                i, value, expected
+            );
+        }
+    }
+    clReleaseMemObject(a_buffer);
+    clReleaseMemObject(b_buffer);
+    clReleaseMemObject(c_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+const std::vector<std::tuple<int, int, int>> params{
+    std::make_tuple<int, int, int>( -1, 0, 0 ),
+    std::make_tuple<int, int, int>( -1, 3, 4 ),
+    std::make_tuple<int, int, int>( 1, 1, 1 ),
+    std::make_tuple<int, int, int>( 3, 4, 2 ),
+    std::make_tuple<int, int, int>( 3, 4, 3 ),
+    std::make_tuple<int, int, int>( 8, 10, 7 ),
+    std::make_tuple<int, int, int>( 16, 16, 16 )
+const std::vector<int> iter_counts{ { 1, 4, 12, 40 } };
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    int error = CL_SUCCESS;
+    for (auto param : params)
+    for (auto iter_count : iter_counts)
+    for (bool offset1_param : { false, true })
+    for (bool offset2_param : { false, true })
+    for (bool iter_count_param : { false, true })
+    for (bool cond_in_header : { false, true })
+    for (bool init_in_header : { false, true })
+    for (bool incr_in_header : { false, true })
+    {
+        test_options options;
+        options.loop = loop_kind::for_loop;
+        options.ivdep_length = std::get<0>(param);
+        options.offset1 = std::get<1>(param);
+        options.offset2 = std::get<2>(param);
+        options.iter_count = iter_count;
+        options.offset1_param = offset1_param;
+        options.offset2_param = offset2_param;
+        options.iter_count_param = iter_count_param;
+        options.cond_in_header = cond_in_header;
+        options.init_in_header = init_in_header;
+        options.incr_in_header = incr_in_header;
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    int error = CL_SUCCESS;
+    for (auto param : params)
+    for (auto iter_count : iter_counts)
+    for (bool offset1_param : { false, true })
+    for (bool offset2_param : { false, true })
+    for (bool iter_count_param : { false, true })
+    for (bool cond_in_header : { false, true })
+    {
+        test_options options;
+        options.loop = loop_kind::while_loop;
+        options.ivdep_length = std::get<0>(param);
+        options.offset1 = std::get<1>(param);
+        options.offset2 = std::get<2>(param);
+        options.iter_count = iter_count;
+        options.offset1_param = offset1_param;
+        options.offset2_param = offset2_param;
+        options.iter_count_param = iter_count_param;
+        options.cond_in_header = cond_in_header;
+        options.init_in_header = false;
+        options.incr_in_header = false;
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    int error = CL_SUCCESS;
+    for (auto param : params)
+    for (auto iter_count : iter_counts)
+    for (bool offset1_param : { false, true })
+    for (bool offset2_param : { false, true })
+    for (bool iter_count_param : { false, true })
+    for (bool cond_in_header : { false, true })
+    {
+        test_options options;
+        options.loop = loop_kind::do_loop;
+        options.ivdep_length = std::get<0>(param);
+        options.offset1 = std::get<1>(param);
+        options.offset2 = std::get<2>(param);
+        options.iter_count = iter_count;
+        options.offset1_param = offset1_param;
+        options.offset2_param = offset2_param;
+        options.iter_count_param = iter_count_param;
+        options.cond_in_header = cond_in_header;
+        options.init_in_header = false;
+        options.incr_in_header = false;
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+} // namespace
diff --git a/test_conformance/clcpp/attributes/test_max_size.hpp b/test_conformance/clcpp/attributes/test_max_size.hpp
new file mode 100644
index 0000000..15e7ead
--- /dev/null
+++ b/test_conformance/clcpp/attributes/test_max_size.hpp
@@ -0,0 +1,266 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+#include <string>
+#include <vector>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+namespace test_max_size {
+enum class address_space
+    constant,
+    local
+enum class param_kind
+    ptr_type, // constant_ptr<T>
+    ptr,      // constant<T>*
+    ref       // constant<T>&
+const param_kind param_kinds[] =
+    param_kind::ptr_type,
+    param_kind::ptr,
+    param_kind::ref
+struct test_options
+    address_space space;
+    int max_size;
+    bool spec_const;
+    param_kind kind;
+    bool array;
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << "kernel void test(";
+    s << ( == address_space::constant ? "constant" : "local");
+    s << " int2 *input) { }" << std::endl;
+    return s.str();
+std::string generate_source(test_options options)
+    std::string type_str = "int2";
+    if (options.array)
+        type_str += "[]";
+    std::stringstream s;
+    s << "#include <opencl_memory>" << std::endl;
+    if (options.spec_const)
+    {
+        s << "#include <opencl_spec_constant>" << std::endl;
+        s << "cl::spec_constant<int, 1> max_size_spec{ 1234567890 };" << std::endl;
+    }
+    s << "kernel void test(";
+    s << "[[cl::max_size(" << (options.spec_const ? "max_size_spec" : std::to_string(options.max_size)) << ")]] ";
+    s << ( == address_space::constant ? "cl::constant" : "cl::local");
+    if (options.kind == param_kind::ptr_type)
+        s << "_ptr<" << type_str << ">";
+    else if (options.kind == param_kind::ptr)
+        s << "<" << type_str << ">*";
+    else if (options.kind == param_kind::ref)
+        s << "<" << type_str << ">&";
+    s << " input) { }" << std::endl;
+    return s.str();
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+    int error = CL_SUCCESS;
+    cl_program program;
+    cl_kernel kernel;
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    const char *source_c_str = source.c_str();
+    error = create_openclcpp_program(context, &program, 1, &source_c_str, "");
+    RETURN_ON_ERROR(error)
+    if (options.spec_const)
+    {
+        error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_int), &options.max_size);
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &source_c_str, kernel_name.c_str()
+    );
+    RETURN_ON_ERROR(error)
+    const int max_size = options.max_size;
+    const int sizes[] = {
+        1,
+        max_size / 2,
+        max_size,
+        max_size + 1,
+        max_size * 2
+    };
+    for (int size : sizes)
+    {
+        cl_mem const_buffer;
+        if ( == address_space::constant)
+        {
+            const_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &error);
+            RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+            error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &const_buffer);
+            // Check the status later (depending on size and max_size values)
+        }
+        else if ( == address_space::local)
+        {
+            error = clSetKernelArg(kernel, 0, size, NULL);
+            // Check the status later (depending on size and max_size values)
+        }
+        if (size <= max_size)
+        {
+            // Correct value, must not fail
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+            const size_t global_size = 123;
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+            error = clFinish(queue);
+            RETURN_ON_CL_ERROR(error, "clFinish")
+        }
+        else
+        {
+            // Incorrect value, must fail
+            if (error != CL_MAX_SIZE_RESTRICTION_EXCEEDED)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "clSetKernelArg must fail with CL_MAX_SIZE_RESTRICTION_EXCEEDED,"
+                    " but returned %s (%d)", get_cl_error_string(error).c_str(), error
+                );
+            }
+        }
+        if ( == address_space::constant)
+        {
+            error = clReleaseMemObject(const_buffer);
+            RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+        }
+    }
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    int error = CL_SUCCESS;
+    cl_ulong max_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(max_size), &max_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    for (bool spec_const : { false, true })
+    for (auto kind : param_kinds)
+    for (bool array : { false, true })
+    {
+        test_options options;
+ = address_space::constant;
+        options.max_size = max_size / 2;
+        options.spec_const = spec_const;
+        options.kind = kind;
+        options.array = array;
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    int error = CL_SUCCESS;
+    cl_ulong max_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_size), &max_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    for (bool spec_const : { false, true })
+    for (auto kind : param_kinds)
+    for (bool array : { false, true })
+    {
+        test_options options;
+ = address_space::local;
+        options.max_size = max_size / 2;
+        options.spec_const = spec_const;
+        options.kind = kind;
+        options.array = array;
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+} // namespace
diff --git a/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp b/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp
new file mode 100644
index 0000000..2380eaf
--- /dev/null
+++ b/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp
@@ -0,0 +1,285 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+#include <string>
+#include <vector>
+#include <random>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+namespace test_required_num_sub_groups {
+struct test_options
+    size_t num_sub_groups;
+    bool spec_const;
+    size_t max_count;
+    size_t num_tests;
+struct output_type
+    cl_ulong num_sub_groups;
+    cl_ulong enqueued_num_sub_groups;
+const std::string source_common = R"(
+struct output_type
+    ulong num_sub_groups;
+    ulong enqueued_num_sub_groups;
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << source_common;
+    s << R"(
+    #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+    kernel void test(global struct output_type *output)
+    {
+        const ulong gid = get_global_linear_id();
+        output[gid].num_sub_groups = get_num_sub_groups();
+        output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+    }
+    )";
+    return s.str();
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+    using namespace cl;
+    )";
+    if (options.spec_const)
+    {
+        s << "#include <opencl_spec_constant>" << std::endl;
+        s << "cl::spec_constant<uint, 1> num_sub_groups_spec{ 1234567890 };" << std::endl;
+    }
+    s << source_common << std::endl;
+    s << "[[cl::required_num_sub_groups(" << (options.spec_const ? "num_sub_groups_spec" : std::to_string(options.num_sub_groups)) << ")]]";
+    s << R"(
+    kernel void test(global_ptr<output_type[]> output)
+    {
+        const ulong gid = get_global_linear_id();
+        output[gid].num_sub_groups = get_num_sub_groups();
+        output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+    }
+    )";
+    return s.str();
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+    int error = CL_SUCCESS;
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    if (!is_extension_available(device, "cl_khr_subgroups"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+    cl_program program;
+    cl_kernel kernel;
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    const char *source_c_str = source.c_str();
+    error = create_openclcpp_program(context, &program, 1, &source_c_str, "");
+    RETURN_ON_ERROR(error)
+    if (options.spec_const)
+    {
+        cl_uint spec_num_sub_groups = static_cast<cl_uint>(options.num_sub_groups);
+        error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_uint), &spec_num_sub_groups);
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &source_c_str, kernel_name.c_str()
+    );
+    RETURN_ON_ERROR(error)
+    size_t compile_num_sub_groups;
+    error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
+        0, NULL,
+        sizeof(size_t), &compile_num_sub_groups, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+    if (compile_num_sub_groups != options.num_sub_groups)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "CL_KERNEL_COMPILE_NUM_SUB_GROUPS did not return correct value (expected %lu, got %lu)",
+            options.num_sub_groups, compile_num_sub_groups
+        )
+    }
+    cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * options.max_count, NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> count_dis(1, options.max_count);
+    for (size_t test = 0; test < options.num_tests; test++)
+    {
+        for (size_t dim = 1; dim <= 3; dim++)
+        {
+            size_t global_size[3] = { 1, 1, 1 };
+            size_t count = count_dis(gen);
+            std::uniform_int_distribution<size_t> global_size_dis(1, static_cast<size_t>(pow(count, 1.0 / dim)));
+            for (size_t d = 0; d < dim; d++)
+            {
+                global_size[d] = global_size_dis(gen);
+            }
+            count = global_size[0] * global_size[1] * global_size[2];
+            size_t local_size[3] = { 1, 1, 1 };
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
+                sizeof(size_t), &options.num_sub_groups,
+                sizeof(size_t) * dim, local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (local_size[0] == 0 || local_size[1] != 1 || local_size[2] != 1)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT did not return correct value"
+                )
+            }
+            size_t sub_group_count_for_ndrange;
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
+                sizeof(size_t) * dim, local_size,
+                sizeof(size_t), &sub_group_count_for_ndrange, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (sub_group_count_for_ndrange != options.num_sub_groups)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE did not return correct value (expected %lu, got %lu)",
+                    options.num_sub_groups, sub_group_count_for_ndrange
+                )
+            }
+            const char pattern = 0;
+            error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+            error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, global_size, local_size, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+            std::vector<output_type> output(count);
+            error = clEnqueueReadBuffer(
+                queue, output_buffer, CL_TRUE,
+                0, sizeof(output_type) * count,
+                static_cast<void *>(,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+            for (size_t gid = 0; gid < count; gid++)
+            {
+                const output_type &o = output[gid];
+                if (o.enqueued_num_sub_groups != options.num_sub_groups)
+                {
+                    RETURN_ON_ERROR_MSG(-1, "get_enqueued_num_sub_groups does not equal to required_num_sub_groups")
+                }
+                if (o.num_sub_groups > options.num_sub_groups)
+                {
+                    RETURN_ON_ERROR_MSG(-1, "get_num_sub_groups did not return correct value")
+                }
+            }
+        }
+    }
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    int error = CL_SUCCESS;
+    cl_uint max_num_sub_groups;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(max_num_sub_groups), &max_num_sub_groups, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    for (bool spec_const : { false, true })
+    for (size_t num_sub_groups = 1; num_sub_groups <= max_num_sub_groups; num_sub_groups++)
+    {
+        test_options options;
+        options.spec_const = spec_const;
+        options.num_sub_groups = num_sub_groups;
+        options.num_tests = 100;
+        options.max_count = num_elements;
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+} // namespace
diff --git a/test_conformance/clcpp/common.hpp b/test_conformance/clcpp/common.hpp
new file mode 100644
index 0000000..e062002
--- /dev/null
+++ b/test_conformance/clcpp/common.hpp
@@ -0,0 +1,51 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cmath>
+#include <sys/types.h>
+#include <sys/stat.h>
+// harness framework
+#include "harness/compat.h"
+#include "harness/testHarness.h"
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+// autotest
+#include "autotest/autotest.hpp"
+// utils_common
+#include "utils_common/is_vector_type.hpp"
+#include "utils_common/scalar_type.hpp"
+#include "utils_common/make_vector_type.hpp"
+#include "utils_common/type_name.hpp"
+#include "utils_common/type_supported.hpp"
+#include "utils_common/vector_size.hpp"
+#include "utils_common/kernel_helpers.hpp"
+#include "utils_common/errors.hpp"
+#include "utils_common/string.hpp"
+size_t get_uniform_global_size(size_t global_size, size_t local_size)
+    return static_cast<size_t>(std::ceil(static_cast<double>(global_size) / local_size)) * local_size;
diff --git a/test_conformance/clcpp/common_funcs/CMakeLists.txt b/test_conformance/clcpp/common_funcs/CMakeLists.txt
new file mode 100644
index 0000000..5e4d8b0
--- /dev/null
+++ b/test_conformance/clcpp/common_funcs/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/common_funcs/common_funcs.hpp b/test_conformance/clcpp/common_funcs/common_funcs.hpp
new file mode 100644
index 0000000..d6f8c89
--- /dev/null
+++ b/test_conformance/clcpp/common_funcs/common_funcs.hpp
@@ -0,0 +1,417 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include <type_traits>
+#include <algorithm>
+// floatn clamp(floatn x, floatn min, floatn max) (only scalars)
+template<class IN1, class IN2, class IN3, class OUT1>
+struct common_func_clamp : public ternary_func<IN1, IN2, IN3, OUT1>
+    std::string str()
+    {
+        return "clamp";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        return (std::min)((std::max)(x, minval), maxval);
+    }
+    IN2 min2()
+    {
+        return (std::numeric_limits<IN2>::min)();
+    }
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(4000.0f);
+    }
+    IN3 min3()
+    {
+        return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(4000.0f));
+    }
+    IN3 max3()
+    {
+        return (std::numeric_limits<IN3>::max)() / IN3(2000.0f);
+    }
+    float ulp()
+    {
+        return 0.0f;
+    }
+// floatn degrees(floatn t)
+template<class IN1, class OUT1, class REFERENCE>
+struct common_func_degrees : public unary_func<IN1, OUT1>
+    std::string str()
+    {
+        return "degrees";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+    REFERENCE operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "All types must be the same"
+        );
+        return (REFERENCE(180.0) / CL_M_PI) * static_cast<REFERENCE>(x);
+    }
+    float ulp()
+    {
+        return 2.5f;
+    }
+// floatn max(floatn x, floatn y)
+template<class IN1, class IN2, class OUT1>
+struct common_func_max : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "max";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        return (std::max)(x, y);
+    }
+    float ulp()
+    {
+        return 0.0f;
+    }
+// floatn min(floatn x, floatn y)
+template<class IN1, class IN2, class OUT1>
+struct common_func_min : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "min";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        return (std::min)(x, y);
+    }
+    float ulp()
+    {
+        return 0.0f;
+    }
+// floatn mix(floatn x, floatn y, floatn a);
+template<class IN1, class IN2, class IN3, class OUT1>
+struct common_func_mix : public ternary_func<IN1, IN2, IN3, OUT1>
+    std::string str()
+    {
+        return "mix";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& a)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        return static_cast<double>(x) + ((static_cast<double>(y) - static_cast<double>(x)) * static_cast<double>(a));
+    }
+    IN3 min3()
+    {
+        return IN3(0.0f + CL_FLT_EPSILON);
+    }
+    IN3 max3()
+    {
+        return IN3(1.0f - CL_FLT_EPSILON);
+    }
+    bool use_ulp()
+    {
+        return false;
+    }
+// floatn radians(floatn t)
+template<class IN1, class OUT1, class REFERENCE>
+struct common_func_radians : public unary_func<IN1, OUT1>
+    std::string str()
+    {
+        return "radians";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+    REFERENCE operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "All types must be the same"
+        );
+        return (CL_M_PI / REFERENCE(180.0)) * static_cast<REFERENCE>(x);
+    }
+    float ulp()
+    {
+        return 2.5f;
+    }
+// floatn step(floatn edge, floatn x)
+template<class IN1, class IN2, class OUT1>
+struct common_func_step : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "step";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+    OUT1 operator()(const IN1& edge, const IN2& x)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        if(x < edge)
+            return OUT1(0.0f);
+        return OUT1(1.0f);
+    }
+    float ulp()
+    {
+        return 0.0f;
+    }
+// floatn smoothstep(floatn edge0, floatn edge1, floatn x);
+template<class IN1, class IN2, class IN3, class OUT1>
+struct common_func_smoothstep : public ternary_func<IN1, IN2, IN3, OUT1>
+    std::string str()
+    {
+        return "smoothstep";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+    OUT1 operator()(const IN1& edge0, const IN2& edge1, const IN3& x)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        if(x <= edge0)
+        {
+            return OUT1(0.0f);
+        }
+        if(x >= edge1)
+        {
+            return OUT1(1.0f);
+        }
+        OUT1 t = (x - edge0) / (edge1 - edge0);
+        t = t * t * (3.0f - 2.0f * t);
+        return t;
+    }
+    // edge0 must be < edge1
+    IN1 min1()
+    {
+        return (std::numeric_limits<IN1>::min)();
+    }
+    IN1 max1()
+    {
+        return (std::numeric_limits<IN1>::max)() / IN1(8000.0f);
+    }
+    IN2 min2()
+    {
+        return IN3(1) + ((std::numeric_limits<IN2>::max)() / IN2(4000.0f));
+    }
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(2000.0f);
+    }
+    bool use_ulp()
+    {
+        return false;
+    }
+// floatn sign(floatn t)
+template<class IN1, class OUT1>
+struct common_func_sign : public unary_func<IN1, OUT1>
+    std::string str()
+    {
+        return "sign";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+    OUT1 operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "All types must be the same"
+        );
+        if(x == IN1(-0.0f))
+        {
+            return IN1(-0.0f);
+        }
+        if(x == IN1(+0.0f))
+        {
+            return IN1(+0.0f);
+        }
+        if(x > IN1(0.0f))
+        {
+            return IN1(1.0f);
+        }
+        return IN1(-1.0f);
+    }
+    bool use_ulp()
+    {
+        return false;
+    }
+    float ulp()
+    {
+        return 0.0f;
+    }
+    std::vector<IN1> in_special_cases()
+    {
+        return { -0.0f, +0.0f };
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // floatn clamp(floatn x, floatn min, floatn max)
+    TEST_TERNARY_FUNC_MACRO((common_func_clamp<cl_float, cl_float, cl_float, cl_float>()))  
+    // floatn degrees(floatn t)
+    TEST_UNARY_FUNC_MACRO((common_func_degrees<cl_float, cl_float, cl_double>()))  
+    // floatn max(floatn x, floatn y);
+    TEST_BINARY_FUNC_MACRO((common_func_max<cl_float, cl_float, cl_float>()))
+    // floatn min(floatn x, floatn y);
+    TEST_BINARY_FUNC_MACRO((common_func_min<cl_float, cl_float, cl_float>()))
+    // floatn mix(floatn x, floatn y, floatn a);
+    TEST_TERNARY_FUNC_MACRO((common_func_mix<cl_float, cl_float, cl_float, cl_float>()))
+    // floatn radians(floatn t)
+    TEST_UNARY_FUNC_MACRO((common_func_radians<cl_float, cl_float, cl_double>()))
+    // floatn step(floatn edge, floatn x)
+    TEST_BINARY_FUNC_MACRO((common_func_step<cl_float, cl_float, cl_float>()))
+    // floatn smoothstep(floatn edge0, floatn edge1, floatn x)
+    TEST_TERNARY_FUNC_MACRO((common_func_smoothstep<cl_float, cl_float, cl_float, cl_float>()))
+    // floatn sign(floatn t);
+    TEST_UNARY_FUNC_MACRO((common_func_sign<cl_float, cl_float>()))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/common_funcs/main.cpp b/test_conformance/clcpp/common_funcs/main.cpp
new file mode 100644
index 0000000..a66d8f2
--- /dev/null
+++ b/test_conformance/clcpp/common_funcs/main.cpp
@@ -0,0 +1,43 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <limits>
+#include "../common.hpp"
+#include "common_funcs.hpp"
+int main(int argc, const char *argv[])
+    // Check if cl_float (float) and cl_double (double) fulfill the requirements of
+    // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
+    if(!std::numeric_limits<cl_float>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    if(!std::numeric_limits<cl_double>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/convert/CMakeLists.txt b/test_conformance/clcpp/convert/CMakeLists.txt
new file mode 100644
index 0000000..9f69fea
--- /dev/null
+++ b/test_conformance/clcpp/convert/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/convert/convert_cast.hpp b/test_conformance/clcpp/convert/convert_cast.hpp
new file mode 100644
index 0000000..81fcca6
--- /dev/null
+++ b/test_conformance/clcpp/convert/convert_cast.hpp
@@ -0,0 +1,309 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include <functional>
+enum class rounding_mode
+    def,
+    /*rte, not implemented here */
+    rtz,
+    rtp,
+    rtn
+enum class saturate { def, off, on };
+std::string rounding_mode_name(rounding_mode rmode)
+    switch (rmode)
+    {
+        case rounding_mode::rtz: return "rtz";
+        case rounding_mode::rtp: return "rtp";
+        case rounding_mode::rtn: return "rtn";
+        default: return "";
+    }
+std::string saturate_name(saturate smode)
+    switch (smode)
+    {
+        case saturate::off: return "off";
+        case saturate::on:  return "on";
+        default: return "";
+    }
+template<class T>
+T clamp(T x, T a, T b)
+    return (std::min)(b, (std::max)(a, x));
+template<class IN1, class OUT1>
+struct convert_cast : public unary_func<IN1, OUT1>
+    static_assert(vector_size<IN1>::value == vector_size<OUT1>::value, "The operand and result type must have the same number of elements");
+    typedef typename scalar_type<IN1>::type in_scalar_type;
+    typedef typename scalar_type<OUT1>::type out_scalar_type;
+    in_scalar_type in_min;
+    in_scalar_type in_max;
+    rounding_mode rmode;
+    saturate smode;
+    convert_cast(in_scalar_type min, in_scalar_type max, rounding_mode rmode, saturate smode)
+        : in_min(min), in_max(max), rmode(rmode), smode(smode)
+    {
+    }
+    std::string str()
+    {
+        return "convert_cast";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_convert>\n";
+    }
+    IN1 min1()
+    {
+        return detail::def_limit<IN1>(in_min);
+    }
+    IN1 max1()
+    {
+        return detail::def_limit<IN1>(in_max);
+    }
+    OUT1 operator()(const IN1& x)
+    {
+        OUT1 y;
+        for (size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            in_scalar_type v;
+            if (smode == saturate::on)
+                v = clamp(x.s[i],
+                    static_cast<in_scalar_type>((std::numeric_limits<out_scalar_type>::min)()),
+                    static_cast<in_scalar_type>((std::numeric_limits<out_scalar_type>::max)())
+                );
+            else
+                v = x.s[i];
+            if (std::is_integral<out_scalar_type>::value)
+            {
+                switch (rmode)
+                {
+                    case rounding_mode::rtp:
+                        y.s[i] = static_cast<out_scalar_type>(std::ceil(v));
+                        break;
+                    case rounding_mode::rtn:
+                        y.s[i] = static_cast<out_scalar_type>(std::floor(v));
+                        break;
+                    default:
+                        y.s[i] = static_cast<out_scalar_type>(v);
+                }
+            }
+            else
+            {
+                y.s[i] = static_cast<out_scalar_type>(v);
+            }
+        }
+        return y;
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_convert_cast(func_type func)
+    std::string in1_value = "input[gid]";
+    std::string function_call = "convert_" + type_name<out_type>();
+    if (func.smode == saturate::on)
+        function_call += "_sat";
+    if (func.rmode != rounding_mode::def)
+        function_call += "_" + rounding_mode_name(func.rmode);
+    function_call += "(" + in1_value + ")";
+    return
+        "__kernel void test_" + func.str() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_convert_cast(func_type func)
+    std::string headers = func.headers();
+    std::string in1_value = "input[gid]";
+    std::string function_call = "convert_cast<" + type_name<out_type>();
+    if (func.rmode != rounding_mode::def)
+        function_call += ", rounding_mode::" + rounding_mode_name(func.rmode);
+    if (func.smode != saturate::def)
+        function_call += ", saturate::" + saturate_name(func.smode);
+    function_call += ">(" + in1_value + ")";
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + type_name<in_type>() +  "[]> input,"
+                                              "global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+template <class convert_cast_op>
+int test_convert_cast_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, convert_cast_op op)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int error;
+    typedef typename convert_cast_op::in_type INPUT;
+    typedef typename convert_cast_op::out_type OUTPUT;
+    // Don't run test for unsupported types
+    if (!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    std::string code_str = generate_kernel_convert_cast<convert_cast_op, INPUT, OUTPUT>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(error)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(INPUT) * input.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(OUTPUT) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    work_size[0] = count;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    error = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    if (!verify_unary(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    last_error = test_convert_cast_func( \
+        device, context, queue, n_elems, OP \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+    // No-op
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float2, cl_float2>(-100.0f, +100.0f, rounding_mode::rtn, saturate::def)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_uchar2, cl_uchar2>(0, 255, rounding_mode::def, saturate::def)))
+    // int to int
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_int4, cl_short4>(40000, 40000, rounding_mode::def, saturate::on)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_uchar8, cl_char8>(0, 127, rounding_mode::def, saturate::off)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_char8, cl_int8>(-100, 100, rounding_mode::def, saturate::off)))
+    // float to int
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float2, cl_uchar2>(-100.0f, +400.0f, rounding_mode::def, saturate::on)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_double4, cl_char4>(-127.0, +127.0, rounding_mode::rtp, saturate::off)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float8, cl_uint8>(-1000.0f, +10000.0f, rounding_mode::rtp, saturate::on)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float16, cl_ushort16>(-10000.0f, +70000.0f, rounding_mode::rtn, saturate::on)))
+    // int to float
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_short8, cl_float8>(0, 12345, rounding_mode::def, saturate::def)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_long2, cl_float2>(-1000000, +1000000, rounding_mode::rtz, saturate::def)))
+    if (error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/convert/main.cpp b/test_conformance/clcpp/convert/main.cpp
new file mode 100644
index 0000000..9f4ed09
--- /dev/null
+++ b/test_conformance/clcpp/convert/main.cpp
@@ -0,0 +1,25 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "convert_cast.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/device_queue/CMakeLists.txt b/test_conformance/clcpp/device_queue/CMakeLists.txt
new file mode 100644
index 0000000..0e1b2ee
--- /dev/null
+++ b/test_conformance/clcpp/device_queue/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/device_queue/main.cpp b/test_conformance/clcpp/device_queue/main.cpp
new file mode 100644
index 0000000..1075c78
--- /dev/null
+++ b/test_conformance/clcpp/device_queue/main.cpp
@@ -0,0 +1,25 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_enqueue.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/device_queue/test_enqueue.hpp b/test_conformance/clcpp/device_queue/test_enqueue.hpp
new file mode 100644
index 0000000..f5d4e6d
--- /dev/null
+++ b/test_conformance/clcpp/device_queue/test_enqueue.hpp
@@ -0,0 +1,699 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+namespace test_enqueue {
+struct test_options
+    int test;
+struct output_type
+    cl_int enqueue_kernel1_success;
+    cl_int enqueue_kernel2_success;
+    cl_int enqueue_kernel3_success;
+    cl_int enqueue_marker_success;
+    cl_int event1_is_valid;
+    cl_int event2_is_valid;
+    cl_int event3_is_valid;
+    cl_int user_event1_is_valid;
+    cl_int user_event2_is_valid;
+    cl_int values[10000];
+const std::string source_common = R"(
+struct output_type
+    int enqueue_kernel1_success;
+    int enqueue_kernel2_success;
+    int enqueue_kernel3_success;
+    int enqueue_marker_success;
+    int event1_is_valid;
+    int event2_is_valid;
+    int event3_is_valid;
+    int user_event1_is_valid;
+    int user_event2_is_valid;
+    int values[10000];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << source_common;
+    if (options.test == 0)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid != 0)
+            return;
+        output->enqueue_kernel2_success = 1;
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event2_is_valid = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+        queue_t default_queue = get_default_queue();
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0));
+        clk_event_t event1;
+        int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange1, 0, NULL, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid] = 1;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+        release_event(event1);
+    }
+    )";
+    }
+    else if (options.test == 1)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid != 0)
+            return;
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+        queue_t default_queue = get_default_queue();
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
+        clk_event_t event1;
+        int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange1, 0, NULL, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+        ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, 1);
+        clk_event_t event2;
+        int status2 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange2, 1, &event1, &event2,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 1;
+        });
+        output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
+        output->event2_is_valid = is_valid_event(event2);
+        release_event(event1);
+        release_event(event2);
+    }
+    )";
+    }
+    else if (options.test == 2)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid != 0)
+            return;
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->enqueue_kernel3_success = 1;
+        queue_t default_queue = get_default_queue();
+        clk_event_t user_event1 = create_user_event();
+        retain_event(user_event1);
+        output->user_event1_is_valid = is_valid_event(user_event1);
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
+        clk_event_t event1;
+        int status1 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange1, 1, &user_event1, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+        release_event(user_event1);
+        clk_event_t user_event2 = create_user_event();
+        output->user_event2_is_valid = is_valid_event(user_event2);
+        clk_event_t events[2];
+        events[0] = user_event2;
+        events[1] = user_event1;
+        ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, get_local_size(0));
+        clk_event_t event2;
+        int status2 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange2, 2, events, &event2,
+        ^(local void *p0, local void *p1, local void *p2) {
+            const ulong gid = get_global_id(0);
+            const ulong lid = get_local_id(0);
+            local int2 *l0 = (local int2 *)p0;
+            local int *l1 = (local int *)p1;
+            local int *l2 = (local int *)p2;
+            l1[get_local_size(0) - lid - 1] = gid > 0 ? 1 : 0;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+            if (lid < 5) l0[lid] = (int2)(3, 4);
+            if (lid < 3) l2[lid] = 5;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+            output->values[(gid - 1) * 2 + 1] = min(l1[lid], min(l0[0].x, l2[0]));
+        }, sizeof(int2) * 5, sizeof(int) * get_local_size(0), sizeof(int) * 3);
+        output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
+        output->event2_is_valid = is_valid_event(event2);
+        set_user_event_status(user_event1, CL_COMPLETE);
+        set_user_event_status(user_event2, CL_COMPLETE);
+        release_event(user_event1);
+        release_event(user_event2);
+        release_event(event1);
+        release_event(event2);
+    }
+    )";
+    }
+    else if (options.test == 3)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid != 0)
+            return;
+        output->user_event2_is_valid = 1;
+        queue_t default_queue = get_default_queue();
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
+        clk_event_t event1;
+        int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange1, 0, NULL, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 20;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+        ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, 1);
+        clk_event_t event2;
+        int status2 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange2, 0, NULL, &event2,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 20;
+        });
+        output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
+        output->event2_is_valid = is_valid_event(event2);
+        clk_event_t user_event1 = create_user_event();
+        output->user_event1_is_valid = is_valid_event(user_event1);
+        clk_event_t events[3];
+        events[0] = event2;
+        events[1] = user_event1;
+        events[2] = event1;
+        clk_event_t event3;
+        int status3 = enqueue_marker(queue, 3, events, &event3);
+        output->enqueue_marker_success = status3 == CLK_SUCCESS;
+        output->event3_is_valid = is_valid_event(event3);
+        int status4 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange_1D(get_global_size(0)), 1, &event3, NULL,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid] /= 20;
+        });
+        output->enqueue_kernel3_success = status4 == CLK_SUCCESS;
+        set_user_event_status(user_event1, CL_COMPLETE);
+        release_event(user_event1);
+        release_event(event1);
+        release_event(event2);
+        release_event(event3);
+    }
+    )";
+    }
+    return s.str();
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_common>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+    #include <opencl_device_queue>
+    using namespace cl;
+    )";
+    s << source_common;
+    if (options.test == 0)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid != 0)
+            return;
+        output->enqueue_kernel2_success = 1;
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event2_is_valid = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+        device_queue default_queue = get_default_device_queue();
+        ndrange ndrange1(get_global_size(0));
+        event event1;
+        enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 0, nullptr, &event1, ndrange1,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid] = 1;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+        event1.release();
+    }
+    )";
+    }
+    else if (options.test == 1)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid != 0)
+            return;
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+        device_queue default_queue = get_default_device_queue();
+        ndrange ndrange1(get_global_size(0) / 2);
+        event event1;
+        enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::wait_work_group, 0, nullptr, &event1, ndrange1,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+        ndrange ndrange2(1, get_global_size(0) / 2, 1);
+        event event2;
+        enqueue_status status2 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 1, &event1, &event2, ndrange2,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 1;
+        }, output);
+        output->enqueue_kernel2_success = status2 == enqueue_status::success;
+        output->event2_is_valid = event2.is_valid();
+        event1.release();
+        event2.release();
+    }
+    )";
+    }
+    else if (options.test == 2)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid != 0)
+            return;
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->enqueue_kernel3_success = 1;
+        device_queue default_queue = get_default_device_queue();
+        event user_event1 = make_user_event();
+        user_event1.retain();
+        output->user_event1_is_valid = user_event1.is_valid();
+        ndrange ndrange1(get_global_size(0) / 2);
+        event event1;
+        enqueue_status status1 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 1, &user_event1, &event1, ndrange1,
+        [](global<output_type> *output){
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+        user_event1.release();
+        event user_event2 = make_user_event();
+        output->user_event2_is_valid = user_event2.is_valid();
+        event events[2];
+        events[0] = user_event2;
+        events[1] = user_event1;
+        ndrange ndrange2(1, get_global_size(0) / 2, get_local_size(0));
+        event event2;
+        enqueue_status status2 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 2, events, &event2, ndrange2,
+        [](global<output_type> *output, local_ptr<int2[]> l0, local_ptr<int[]> l1, local_ptr<int[]> l2) {
+            const ulong gid = get_global_id(0);
+            const ulong lid = get_local_id(0);
+            l1[get_local_size(0) - lid - 1] = gid > 0 ? 1 : 0;
+            work_group_barrier(mem_fence::local);
+            if (lid < 5) l0[lid] = int2(3, 4);
+            if (lid < 3) l2[lid] = 5;
+            work_group_barrier(mem_fence::local);
+            output->values[(gid - 1) * 2 + 1] = min(l1[lid], min(l0[0].x, l2[0]));
+        }, output, local_ptr<int2[]>::size_type(5), local_ptr<int[]>::size_type(get_local_size(0)), local_ptr<int[]>::size_type(3));
+        output->enqueue_kernel2_success = status2 == enqueue_status::success;
+        output->event2_is_valid = event2.is_valid();
+        user_event1.set_status(event_status::complete);
+        user_event2.set_status(event_status::complete);
+        user_event1.release();
+        user_event2.release();
+        event1.release();
+        event2.release();
+    }
+    )";
+    }
+    else if (options.test == 3)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid != 0)
+            return;
+        output->user_event2_is_valid = 1;
+        device_queue default_queue = get_default_device_queue();
+        ndrange ndrange1(get_global_size(0) / 2);
+        event event1;
+        enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::wait_work_group, 0, nullptr, &event1, ndrange1,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 20;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+        ndrange ndrange2(1, get_global_size(0) / 2, 1);
+        event event2;
+        enqueue_status status2 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 0, nullptr, &event2, ndrange2,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 20;
+        }, output);
+        output->enqueue_kernel2_success = status2 == enqueue_status::success;
+        output->event2_is_valid = event2.is_valid();
+        event user_event1 = make_user_event();
+        output->user_event1_is_valid = user_event1.is_valid();
+        event events[3];
+        events[0] = event2;
+        events[1] = user_event1;
+        events[2] = event1;
+        event event3;
+        enqueue_status status3 = queue.enqueue_marker(3, events, &event3);
+        output->enqueue_marker_success = status3 == enqueue_status::success;
+        output->event3_is_valid = event3.is_valid();
+        enqueue_status status4 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 1, &event3, nullptr, ndrange(get_global_size(0)),
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid] /= 20;
+        }, output);
+        output->enqueue_kernel3_success = status4 == enqueue_status::success;
+        user_event1.set_status(event_status::complete);
+        user_event1.release();
+        event1.release();
+        event2.release();
+        event3.release();
+    }
+    )";
+    }
+    return s.str();
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+    int error = CL_SUCCESS;
+    cl_program program;
+    cl_kernel kernel;
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    cl_uint max_queues;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(cl_uint), &max_queues, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    cl_uint max_events;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_EVENTS, sizeof(cl_uint), &max_events, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    cl_command_queue device_queue1 = NULL;
+    cl_command_queue device_queue2 = NULL;
+    cl_queue_properties queue_properties1[] =
+    {
+        0
+    };
+    device_queue1 = clCreateCommandQueueWithProperties(context, device, queue_properties1, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateCommandQueueWithProperties")
+    if (max_queues > 1)
+    {
+        cl_queue_properties queue_properties2[] =
+        {
+            0
+        };
+        device_queue2 = clCreateCommandQueueWithProperties(context, device, queue_properties2, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateCommandQueueWithProperties")
+    }
+    cl_mem output_buffer;
+    output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(cl_command_queue), device_queue2 != NULL ? &device_queue2 : &device_queue1);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    const char pattern = 0;
+    error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+    size_t max_work_group_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    const size_t local_size = (std::min)((size_t)256, max_work_group_size);
+    const size_t global_size = 10000 / local_size * local_size;
+    const size_t count = global_size;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    output_type output;
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(output_type),
+        static_cast<void *>(&output),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    if (!output.enqueue_kernel1_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
+    }
+    if (!output.enqueue_kernel2_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
+    }
+    if (!output.enqueue_kernel3_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
+    }
+    if (!output.enqueue_marker_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_marker did not succeed")
+    }
+    if (!output.event1_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "event1 is not valid")
+    }
+    if (!output.event2_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "event2 is not valid")
+    }
+    if (!output.event3_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "event3 is not valid")
+    }
+    if (!output.user_event1_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "user_event1 is not valid")
+    }
+    if (!output.user_event2_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "user_event2 is not valid")
+    }
+    for (size_t i = 0; i < count; i++)
+    {
+        const cl_int result = output.values[i];
+        const cl_int expected = 1;
+        if (result != expected)
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "kernel did not return correct value. Expected: %s, got: %s",
+                format_value(expected).c_str(), format_value(result).c_str()
+            )
+        }
+    }
+    clReleaseMemObject(output_buffer);
+    clReleaseCommandQueue(device_queue1);
+    if (device_queue2 != NULL)
+        clReleaseCommandQueue(device_queue2);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    test_options options;
+    options.test = 0;
+    return test(device, context, queue, options);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    test_options options;
+    options.test = 1;
+    return test(device, context, queue, options);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    test_options options;
+    options.test = 2;
+    return test(device, context, queue, options);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    test_options options;
+    options.test = 3;
+    return test(device, context, queue, options);
+} // namespace
diff --git a/test_conformance/clcpp/funcs_test_utils.hpp b/test_conformance/clcpp/funcs_test_utils.hpp
new file mode 100644
index 0000000..e839231
--- /dev/null
+++ b/test_conformance/clcpp/funcs_test_utils.hpp
@@ -0,0 +1,72 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// This file contains helper classes and functions for testing various unary, binary
+// and ternary OpenCL functions (for example cl::abs(x) or cl::abs_diff(x, y)), 
+// as well as other helper functions/classes.
+#include "common.hpp"
+    last_error = test_unary_func(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+    last_error = test_binary_func(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+    last_error = test_ternary_func(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+#include "utils_test/compare.hpp"
+#include "utils_test/generate_inputs.hpp"
+// HOWTO:
+// unary_func, binary_func, ternary_func - base classes wrapping OpenCL functions that
+// you want to test.
+// To create a wrapper class for given function, you need to create a class derived from correct
+// base class (unary_func, binary_func, ternary_func), and define:
+// * std::string str() method which should return class name in OpenCL ("abs", "abs_diff"),
+// * operator(x), operator(x, y) or operator(x,y,z) depending on arity of the function you wish
+// to test, method should work exactly as the tested function works in OpenCL
+// * if it's needed you can overload min1, max1, min2, max2, min3, max3 methods with returns min 
+// and max values that can be generated for given input (function argument) [required for vec 
+// arguments],
+// * if you want to use vector arguments (for example: cl_int2, cl_ulong16), you should look at
+// how int_func_clamp<> is implemented in integer_funcs/numeric_funcs.hpp.
+// To see how you should use class you've just created see AUTO_TEST_CASE(test_int_numeric_funcs)
+// in integer_funcs/numeric_funcs.hpp.
+#include "utils_test/unary.hpp"
+#include "utils_test/binary.hpp"
+#include "utils_test/ternary.hpp"
diff --git a/test_conformance/clcpp/geometric_funcs/CMakeLists.txt b/test_conformance/clcpp/geometric_funcs/CMakeLists.txt
new file mode 100644
index 0000000..25d05ed
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp b/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp
new file mode 100644
index 0000000..c179728
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp
@@ -0,0 +1,229 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include <type_traits>
+// float fast_distance(float4 p0, float4 p1);
+struct geometric_func_fast_distance : public binary_func<cl_float4, cl_float4, cl_float>
+    std::string str()
+    {
+        return "fast_distance";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_double r = 0.0f;
+        cl_double t;
+        for(size_t i = 0; i < 4; i++)
+        {
+            t = static_cast<cl_double>(p0.s[i]) - static_cast<cl_double>(p1.s[i]);
+            r += t * t;
+        }
+        return std::sqrt(r);
+    }
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+    cl_double delta(const cl_float4& p0, const cl_float4& p1, const cl_float& expected)
+    {
+        (void) p0; (void) p1;
+        return 0.01f * expected;
+    }
+    float ulp()
+    {
+        return
+            8192.0f + // error in sqrt
+            (1.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+// float fast_length(float4 p);
+struct geometric_func_fast_length : public unary_func<cl_float4,cl_float>
+    std::string str()
+    {
+        return "fast_length";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+    cl_float operator()(const cl_float4& p)
+    {
+        cl_double r = 0.0f;
+        for(size_t i = 0; i < 4; i++)
+        {
+            r += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+        return std::sqrt(r);
+    }
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+    cl_double delta(const cl_float4& p, const cl_float& expected)
+    {
+        (void) p;
+        return 0.01f * expected;
+    }
+    float ulp()
+    {
+        return
+            8192.0f + // error in sqrt
+            0.5f * // effect on e of taking sqrt( x + e )
+                ((0.5f * 4.0f) + // cumulative error for multiplications
+                (0.5f * 3.0f));  // cumulative error for additions
+    }
+// float4 fast_normalize(float4 p);
+struct geometric_func_fast_normalize : public unary_func<cl_float4,cl_float4>
+    std::string str()
+    {
+        return "fast_normalize";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+    cl_float4 operator()(const cl_float4& p)
+    {
+        cl_double t = 0.0f;
+        cl_float4 r;
+        for(size_t i = 0; i < 4; i++)
+        {
+            t += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+        if(t == 0.0f)
+        {
+            for(size_t i = 0; i < 4; i++)
+            {
+                r.s[i] = 0.0f;
+            }
+            return r;
+        }
+        t = std::sqrt(t);
+        for(size_t i = 0; i < 4; i++)
+        {
+            r.s[i] = static_cast<cl_double>(p.s[i]) / t;
+        }
+        return r;
+    }
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+    std::vector<cl_float4> in_special_cases()
+    {
+        return {
+            {0.0f, 0.0f, 0.0f, 0.0f}
+        };
+    }
+    cl_double4 delta(const cl_float4& p, const cl_float4& expected)
+    {
+        (void) p;
+        auto e = detail::make_value<cl_double4>(0.01f);
+        return detail::multiply<cl_double4>(e, expected);
+    }
+    float ulp()
+    {
+        return
+            8192.5f + // error in rsqrt + error in multiply
+            (0.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // float fast_distance(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_fast_distance()))
+    // float fast_length(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_fast_length()))
+    // float4 fast_normalize(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_fast_normalize()))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp b/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp
new file mode 100644
index 0000000..561f9e9
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp
@@ -0,0 +1,389 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include <type_traits>
+// float4 cross(float4 p0, float4 p1)
+struct geometric_func_cross : public binary_func<cl_float4, cl_float4, cl_float4>
+    geometric_func_cross(cl_device_id device)
+    {
+        // On an embedded device w/ round-to-zero, 3 ulps is the worst-case tolerance for cross product
+        this->m_delta = 3.0f * CL_FLT_EPSILON;
+        // RTZ devices accrue approximately double the amount of error per operation.  Allow for that.
+        if(get_default_rounding_mode(device) == CL_FP_ROUND_TO_ZERO)
+        {
+            this->m_delta *= 2.0f;
+        }
+    }
+    std::string str()
+    {
+        return "cross";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+    cl_float4 operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_float4 r;
+        r.s[0] = (p0.s[1] * p1.s[2]) - (p0.s[2] * p1.s[1]);
+        r.s[1] = (p0.s[2] * p1.s[0]) - (p0.s[0] * p1.s[2]);
+        r.s[2] = (p0.s[0] * p1.s[1]) - (p0.s[1] * p1.s[0]);
+        r.s[3] = 0.0f;
+        return r;
+    }
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+    bool use_ulp()
+    {
+        return false;
+    }
+    cl_double4 delta(const cl_float4& p0, const cl_float4& p1, const cl_float4& expected)
+    {
+        (void) p0; (void) p1;
+        auto e = detail::make_value<cl_double4>(m_delta);
+        return detail::multiply<cl_double4>(e, expected);
+    }
+    cl_double m_delta;
+// float dot(float4 p0, float4 p1);
+struct geometric_func_dot : public binary_func<cl_float4, cl_float4, cl_float>
+    std::string str()
+    {
+        return "dot";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_float r;
+        r = p0.s[0] * p1.s[0];
+        r += p0.s[1] * p1.s[1];
+        r += p0.s[2] * p1.s[2];
+        r += p0.s[3] * p1.s[3];
+        return r;
+    }
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+    bool use_ulp()
+    {
+        return false;
+    }
+    cl_double delta(const cl_float4& p0, const cl_float4& p1, cl_float expected)
+    {
+        (void) p0; (void) p1;
+        return expected * ((4.0f + (4.0f - 1.0f)) * CL_FLT_EPSILON);
+    }
+// float distance(float4 p0, float4 p1);
+struct geometric_func_distance : public binary_func<cl_float4, cl_float4, cl_float>
+    std::string str()
+    {
+        return "distance";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_double r = 0.0f;
+        cl_double t;
+        for(size_t i = 0; i < 4; i++)
+        {
+            t = static_cast<cl_double>(p0.s[i]) - static_cast<cl_double>(p1.s[i]);
+            r += t * t;
+        }
+        return std::sqrt(r);
+    }
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+    float ulp()
+    {
+        return
+            3.0f + // error in sqrt
+            (1.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+// float length(float4 p);
+struct geometric_func_length : public unary_func<cl_float4,cl_float>
+    std::string str()
+    {
+        return "length";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+    cl_float operator()(const cl_float4& p)
+    {
+        cl_double r = 0.0f;
+        for(size_t i = 0; i < 4; i++)
+        {
+            r += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+        return std::sqrt(r);
+    }
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+    float ulp()
+    {
+        return
+            3.0f + // error in sqrt
+            0.5f * // effect on e of taking sqrt( x + e )
+                ((0.5f * 4.0f) + // cumulative error for multiplications
+                (0.5f * 3.0f));  // cumulative error for additions
+    }
+// float4 normalize(float4 p);
+struct geometric_func_normalize : public unary_func<cl_float4,cl_float4>
+    std::string str()
+    {
+        return "normalize";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+    cl_float4 operator()(const cl_float4& p)
+    {
+        cl_double t = 0.0f;
+        cl_float4 r;
+        // normalize( v ) returns a vector full of NaNs if any element is a NaN.
+        for(size_t i = 0; i < 4; i++)
+        {
+            if((std::isnan)(p.s[i]))
+            {
+                for(size_t j = 0; j < 4; j++)
+                {
+                    r.s[j] = p.s[i];
+                }
+                return r;
+            }
+        }
+        // normalize( v ) for which any element in v is infinite shall proceed as
+        // if the elements in v were replaced as follows:
+        // for( i = 0; i < sizeof(v) / sizeof(v[0] ); i++ )
+        //     v[i] = isinf(v[i]) ? copysign(1.0, v[i]) : 0.0 * v [i];
+        for(size_t i = 0; i < 4; i++)
+        {
+            if((std::isinf)(p.s[i]))
+            {
+                for(size_t j = 0; j < 4; j++)
+                {
+                    r.s[j] = (std::isinf)(p.s[j]) ? (std::copysign)(1.0, p.s[j]) : 0.0 * p.s[j];
+                }
+                r = (*this)(r);
+                return r;
+            }
+        }
+        for(size_t i = 0; i < 4; i++)
+        {
+            t += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+        // normalize( v ) returns v if all elements of v are zero.
+        if(t == 0.0f)
+        {
+            for(size_t i = 0; i < 4; i++)
+            {
+                r.s[i] = 0.0f;
+            }
+            return r;
+        }
+        t = std::sqrt(t);
+        for(size_t i = 0; i < 4; i++)
+        {
+            r.s[i] = static_cast<cl_double>(p.s[i]) / t;
+        }
+        return r;
+    }
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+    std::vector<cl_float4> in_special_cases()
+    {
+        return {
+            {0.0f, 0.0f, 0.0f, 0.0f},
+            {std::numeric_limits<float>::infinity(), 0.0f, 0.0f, 0.0f},
+            {
+                std::numeric_limits<float>::infinity(),
+                std::numeric_limits<float>::infinity(),
+                std::numeric_limits<float>::infinity(),
+                std::numeric_limits<float>::infinity()
+            },
+            {
+                std::numeric_limits<float>::infinity(),
+                1.0f,
+                0.0f,
+                std::numeric_limits<float>::quiet_NaN()
+            },
+            {-1.0f, -1.0f, 0.0f,-300.0f}
+        };
+    }
+    float ulp()
+    {
+        return
+            2.5f + // error in rsqrt + error in multiply
+            (0.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // float4 cross(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_cross(device)))
+    // float dot(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_dot()))
+    // float distance(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_distance()))
+    // float length(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_length()))
+    // float4 normalize(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_normalize()))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/geometric_funcs/main.cpp b/test_conformance/clcpp/geometric_funcs/main.cpp
new file mode 100644
index 0000000..ee3a51b
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/main.cpp
@@ -0,0 +1,44 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <limits>
+#include "../common.hpp"
+#include "geometric_funcs.hpp"
+#include "fast_geometric_funcs.hpp"
+int main(int argc, const char *argv[])
+    // Check if cl_float (float) and cl_double (double) fulfill the requirements of
+    // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
+    if(!std::numeric_limits<cl_float>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    if(!std::numeric_limits<cl_double>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/images/CMakeLists.txt b/test_conformance/clcpp/images/CMakeLists.txt
new file mode 100644
index 0000000..3c92ecd
--- /dev/null
+++ b/test_conformance/clcpp/images/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/images/common.hpp b/test_conformance/clcpp/images/common.hpp
new file mode 100644
index 0000000..957d266
--- /dev/null
+++ b/test_conformance/clcpp/images/common.hpp
@@ -0,0 +1,195 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include "../harness/imageHelpers.h"
+namespace detail
+template<cl_channel_type channel_type>
+struct channel_info;
+struct channel_info<CL_SIGNED_INT8>
+    typedef cl_char channel_type;
+    typedef cl_int4 element_type;
+    static std::string function_suffix() { return "i"; }
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+struct channel_info<CL_SIGNED_INT16>
+    typedef cl_short channel_type;
+    typedef cl_int4 element_type;
+    static std::string function_suffix() { return "i"; }
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+struct channel_info<CL_SIGNED_INT32>
+    typedef cl_int channel_type;
+    typedef cl_int4 element_type;
+    static std::string function_suffix() { return "i"; }
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+struct channel_info<CL_UNSIGNED_INT8>
+    typedef cl_uchar channel_type;
+    typedef cl_uint4 element_type;
+    static std::string function_suffix() { return "ui"; }
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+struct channel_info<CL_UNSIGNED_INT16>
+    typedef cl_ushort channel_type;
+    typedef cl_uint4 element_type;
+    static std::string function_suffix() { return "ui"; }
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+struct channel_info<CL_UNSIGNED_INT32>
+    typedef cl_uint channel_type;
+    typedef cl_uint4 element_type;
+    static std::string function_suffix() { return "ui"; }
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+struct channel_info<CL_FLOAT>
+    typedef cl_float channel_type;
+    typedef cl_float4 element_type;
+    static std::string function_suffix() { return "f"; }
+    channel_type channel_min() { return -1e-3f; }
+    channel_type channel_max() { return +1e+3f; }
+template<cl_mem_object_type image_type>
+struct image_info;
+struct image_info<CL_MEM_OBJECT_IMAGE1D>
+    static std::string image_type_name() { return "image1d"; }
+    static std::string coord_accessor() { return "x"; }
+struct image_info<CL_MEM_OBJECT_IMAGE2D>
+    static std::string image_type_name() { return "image2d"; }
+    static std::string coord_accessor() { return "xy"; }
+struct image_info<CL_MEM_OBJECT_IMAGE3D>
+    static std::string image_type_name() { return "image3d"; }
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    static std::string coord_accessor() { return "xyzw"; }
+    static std::string coord_accessor() { return "xyz"; }
+} // namespace
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test_base :
+    detail::channel_info<ChannelType>,
+    detail::image_info<ImageType>
+{ };
+// Create image_descriptor (used by harness/imageHelpers functions)
+image_descriptor create_image_descriptor(cl_image_desc &image_desc, cl_image_format *image_format)
+    image_descriptor image_info;
+    image_info.width = image_desc.image_width;
+    image_info.height = image_desc.image_height;
+    image_info.depth = image_desc.image_depth;
+    image_info.arraySize = image_desc.image_array_size;
+    image_info.rowPitch = image_desc.image_row_pitch;
+    image_info.slicePitch = image_desc.image_slice_pitch;
+    image_info.format = image_format;
+    image_info.buffer = image_desc.mem_object;
+    image_info.type = image_desc.image_type;
+    image_info.num_mip_levels = image_desc.num_mip_levels;
+    return image_info;
+const std::vector<cl_channel_order> get_channel_orders(cl_device_id device)
+    // According to "Minimum List of Supported Image Formats" of OpenCL specification:
+    return { CL_R, CL_RG, CL_RGBA };
+bool is_test_supported(cl_device_id device)
+    // Check for image support
+    if (checkForImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+    {
+        log_info("SKIPPED: Device does not support images. Skipping test.\n");
+        return false;
+    }
+    return true;
+// Checks if x is equal to y.
+template<class type>
+inline bool are_equal(const type& x,
+                      const type& y)
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+    {
+        if(!(x.s[i] == y.s[i]))
+        {
+            return false;
+        }
+    }
+    return true;
diff --git a/test_conformance/clcpp/images/main.cpp b/test_conformance/clcpp/images/main.cpp
new file mode 100644
index 0000000..8c41bb6
--- /dev/null
+++ b/test_conformance/clcpp/images/main.cpp
@@ -0,0 +1,30 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_read.hpp"
+#include "test_sample.hpp"
+#include "test_write.hpp"
+// FIXME: To use certain functions in test_common/harness/imageHelpers.h
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variable (hangover from code specific to Apple's implementation):
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/images/test_read.hpp b/test_conformance/clcpp/images/test_read.hpp
new file mode 100644
index 0000000..3bc7b5e
--- /dev/null
+++ b/test_conformance/clcpp/images/test_read.hpp
@@ -0,0 +1,307 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include "common.hpp"
+namespace test_images_read {
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test : image_test_base<ImageType, ChannelType>
+    cl_channel_order channel_order;
+    image_test(cl_channel_order channel_order) :
+        channel_order(channel_order)
+    { }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
+        kernel void test(
+            read_only )" << image_test::image_type_name() << R"(_t img,
+            const global int4 *coords,
+            global element_type *output
+        ) {
+            const ulong gid = get_global_linear_id();
+            output[gid] = read_image)" << image_test::function_suffix() <<
+                "(img, coords[gid]." << image_test::coord_accessor() << R"();
+        }
+        )";
+        return s.str();
+    }
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        #include <opencl_memory>
+        #include <opencl_common>
+        #include <opencl_work_item>
+        #include <opencl_image>
+        using namespace cl;
+        )";
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() <<  R"( element_type;
+        kernel void test(
+            const )" << image_test::image_type_name() << R"(<element_type, image_access::read> img,
+            const global_ptr<int4[]> coords,
+            global_ptr<element_type[]> output
+        ) {
+            const ulong gid = get_global_linear_id();
+            output[gid] =[gid].)" << image_test::coord_accessor() << R"();
+        }
+        )";
+        return s.str();
+    }
+    int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    {
+        int error = CL_SUCCESS;
+        cl_program program;
+        cl_kernel kernel;
+        std::string kernel_name = "test";
+        std::string source = generate_source();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name, "-cl-std=CL2.0", false
+        );
+        RETURN_ON_ERROR(error)
+// Normal run
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        using element_type = typename image_test::element_type;
+        using coord_type = cl_int4;
+        using scalar_element_type = typename scalar_type<element_type>::type;
+        using channel_type = typename image_test::channel_type;
+        cl_image_format image_format;
+        image_format.image_channel_order = channel_order;
+        image_format.image_channel_data_type = ChannelType;
+        const size_t pixel_size = get_pixel_size(&image_format);
+        const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
+        cl_image_desc image_desc;
+        image_desc.image_type = ImageType;
+        if (ImageType == CL_MEM_OBJECT_IMAGE1D)
+        {
+            image_desc.image_width = 2048;
+            image_desc.image_height = 1;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
+        {
+            image_desc.image_width = 256;
+            image_desc.image_height = 256;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
+        {
+            image_desc.image_width = 64;
+            image_desc.image_height = 64;
+            image_desc.image_depth = 64;
+        }
+        image_desc.image_array_size = 0;
+        image_desc.image_row_pitch = image_desc.image_width * pixel_size;
+        image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
+        std::vector<channel_type> image_values = generate_input(
+            image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
+            image_test::channel_min(), image_test::channel_max(),
+            std::vector<channel_type>()
+        );
+        const size_t count = num_elements;
+        std::vector<coord_type> coords = generate_input(
+            count,
+            detail::make_value<coord_type>(0),
+            coord_type {
+                static_cast<cl_int>(image_desc.image_width - 1),
+                static_cast<cl_int>(image_desc.image_height - 1),
+                static_cast<cl_int>(image_desc.image_depth - 1),
+                0
+            },
+            std::vector<coord_type>()
+        );
+        cl_mem img = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            &image_format, &image_desc, static_cast<void *>(, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateImage")
+        cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(coord_type) * count, static_cast<void *>(, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(element_type) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+        error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        const size_t global_size = count;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+        std::vector<element_type> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(element_type) * count,
+            static_cast<void *>(,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+            const element_type result = output[i];
+            element_type expected;
+            read_image_pixel<scalar_element_type>(static_cast<void *>(, &image_info,
+                c.s[0], c.s[1], c.s[2],
+                expected.s);
+            if (!are_equal(result, expected))
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Reading from coordinates %s failed. Expected: %s, got: %s",
+                    format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
+                );
+            }
+        }
+        clReleaseMemObject(img);
+        clReleaseMemObject(coords_buffer);
+        clReleaseMemObject(output_buffer);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        return error;
+    }
+template<cl_mem_object_type ImageType>
+int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    if (!is_test_supported(device))
+        return CL_SUCCESS;
+    int error = CL_SUCCESS;
+    for (auto channel_order : get_channel_orders(device))
+    {
+        error = image_test<ImageType, CL_SIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_FLOAT>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
+} // namespace
diff --git a/test_conformance/clcpp/images/test_sample.hpp b/test_conformance/clcpp/images/test_sample.hpp
new file mode 100644
index 0000000..a96a563
--- /dev/null
+++ b/test_conformance/clcpp/images/test_sample.hpp
@@ -0,0 +1,363 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include "common.hpp"
+namespace test_images_sample {
+enum class sampler_source
+    param,
+    program_scope
+const sampler_source sampler_sources[] = { sampler_source::param, sampler_source::program_scope };
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test : image_test_base<ImageType, ChannelType>
+    cl_channel_order channel_order;
+    sampler_source source;
+    image_test(cl_channel_order channel_order, sampler_source source) :
+        channel_order(channel_order),
+        source(source)
+    { }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
+        )";
+        std::string sampler;
+        if (source == sampler_source::program_scope)
+        {
+            s << R"(
+        constant sampler_t sampler_program_scope = CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE;
+            )";
+            sampler = "sampler_program_scope";
+        }
+        else if (source == sampler_source::param)
+        {
+            sampler = "sampler_param";
+        }
+        s << R"(
+        kernel void test(
+            read_only )" << image_test::image_type_name() << R"(_t img,
+            const global int4 *coords,
+            global element_type *output,
+            sampler_t sampler_param
+        ) {
+            const ulong gid = get_global_linear_id();
+            output[gid] = read_image)" << image_test::function_suffix() <<
+                "(img, " << sampler << ", coords[gid]." << image_test::coord_accessor() << R"();
+        }
+        )";
+        return s.str();
+    }
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        #include <opencl_memory>
+        #include <opencl_common>
+        #include <opencl_work_item>
+        #include <opencl_image>
+        using namespace cl;
+        )";
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() <<  R"( element_type;
+        )";
+        std::string sampler;
+        if (source == sampler_source::program_scope)
+        {
+            s << R"(
+        sampler sampler_program_scope = make_sampler<addressing_mode::none, normalized_coordinates::unnormalized, filtering_mode::nearest>();
+            )";
+            sampler = "sampler_program_scope";
+        }
+        else if (source == sampler_source::param)
+        {
+            sampler = "sampler_param";
+        }
+        s << R"(
+        kernel void test(
+            const )" << image_test::image_type_name() << R"(<element_type, image_access::sample> img,
+            const global_ptr<int4[]> coords,
+            global_ptr<element_type[]> output,
+            sampler sampler_param
+        ) {
+            const ulong gid = get_global_linear_id();
+            output[gid] = img.sample()" << sampler << ", coords[gid]." << image_test::coord_accessor() << R"();
+        }
+        )";
+        return s.str();
+    }
+    int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    {
+        int error = CL_SUCCESS;
+        cl_program program;
+        cl_kernel kernel;
+        std::string kernel_name = "test";
+        std::string source = generate_source();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name, "-cl-std=CL2.0", false
+        );
+        RETURN_ON_ERROR(error)
+// Normal run
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        using element_type = typename image_test::element_type;
+        using coord_type = cl_int4;
+        using scalar_element_type = typename scalar_type<element_type>::type;
+        using channel_type = typename image_test::channel_type;
+        cl_image_format image_format;
+        image_format.image_channel_order = channel_order;
+        image_format.image_channel_data_type = ChannelType;
+        const size_t pixel_size = get_pixel_size(&image_format);
+        const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
+        cl_image_desc image_desc;
+        image_desc.image_type = ImageType;
+        if (ImageType == CL_MEM_OBJECT_IMAGE1D)
+        {
+            image_desc.image_width = 2048;
+            image_desc.image_height = 1;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
+        {
+            image_desc.image_width = 256;
+            image_desc.image_height = 256;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
+        {
+            image_desc.image_width = 64;
+            image_desc.image_height = 64;
+            image_desc.image_depth = 64;
+        }
+        image_desc.image_array_size = 0;
+        image_desc.image_row_pitch = image_desc.image_width * pixel_size;
+        image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
+        std::vector<channel_type> image_values = generate_input(
+            image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
+            image_test::channel_min(), image_test::channel_max(),
+            std::vector<channel_type>()
+        );
+        const size_t count = num_elements;
+        std::vector<coord_type> coords = generate_input(
+            count,
+            detail::make_value<coord_type>(0),
+            coord_type {
+                static_cast<cl_int>(image_desc.image_width - 1),
+                static_cast<cl_int>(image_desc.image_height - 1),
+                static_cast<cl_int>(image_desc.image_depth - 1),
+                0
+            },
+            std::vector<coord_type>()
+        );
+        cl_mem img = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            &image_format, &image_desc, static_cast<void *>(, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateImage")
+        cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(coord_type) * count, static_cast<void *>(, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(element_type) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+        const cl_sampler_properties sampler_properties[] = {
+            0
+        };
+        cl_sampler sampler = clCreateSamplerWithProperties(context, sampler_properties, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateSamplerWithProperties")
+        error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 3, sizeof(sampler), &sampler);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        const size_t global_size = count;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+        std::vector<element_type> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(element_type) * count,
+            static_cast<void *>(,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+            const element_type result = output[i];
+            element_type expected;
+            read_image_pixel<scalar_element_type>(static_cast<void *>(, &image_info,
+                c.s[0], c.s[1], c.s[2],
+                expected.s);
+            if (!are_equal(result, expected))
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Sampling from coordinates %s failed. Expected: %s, got: %s",
+                    format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
+                );
+            }
+        }
+        clReleaseMemObject(img);
+        clReleaseMemObject(coords_buffer);
+        clReleaseMemObject(output_buffer);
+        clReleaseSampler(sampler);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        return error;
+    }
+template<cl_mem_object_type ImageType>
+int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    if (!is_test_supported(device))
+        return CL_SUCCESS;
+    int error = CL_SUCCESS;
+    for (auto channel_order : get_channel_orders(device))
+    for (auto source : sampler_sources)
+    {
+        error = image_test<ImageType, CL_SIGNED_INT8>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT16>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT32>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_FLOAT>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
+} // namespace
diff --git a/test_conformance/clcpp/images/test_write.hpp b/test_conformance/clcpp/images/test_write.hpp
new file mode 100644
index 0000000..0f54487
--- /dev/null
+++ b/test_conformance/clcpp/images/test_write.hpp
@@ -0,0 +1,327 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include "common.hpp"
+namespace test_images_write {
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test : image_test_base<ImageType, ChannelType>
+    cl_channel_order channel_order;
+    image_test(cl_channel_order channel_order) :
+        channel_order(channel_order)
+    { }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
+        kernel void test(
+            write_only )" << image_test::image_type_name() << R"(_t img,
+            const global int4 *coords,
+            const global element_type *input
+        ) {
+            const ulong gid = get_global_linear_id();
+            write_image)" << image_test::function_suffix() <<
+                "(img, coords[gid]." << image_test::coord_accessor() << R"(, input[gid]);
+        }
+        )";
+        return s.str();
+    }
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        #include <opencl_memory>
+        #include <opencl_common>
+        #include <opencl_work_item>
+        #include <opencl_image>
+        using namespace cl;
+        )";
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() <<  R"( element_type;
+        kernel void test(
+            )" << image_test::image_type_name() << R"(<element_type, image_access::write> img,
+            const global_ptr<int4[]> coords,
+            const global_ptr<element_type[]> input
+        ) {
+            const ulong gid = get_global_linear_id();
+            img.write(coords[gid].)" << image_test::coord_accessor() << R"(, input[gid]);
+        }
+        )";
+        return s.str();
+    }
+    int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    {
+        int error = CL_SUCCESS;
+        cl_program program;
+        cl_kernel kernel;
+        std::string kernel_name = "test";
+        std::string source = generate_source();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name, "-cl-std=CL2.0", false
+        );
+        RETURN_ON_ERROR(error)
+// Normal run
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        using element_type = typename image_test::element_type;
+        using coord_type = cl_int4;
+        using scalar_element_type = typename scalar_type<element_type>::type;
+        using channel_type = typename image_test::channel_type;
+        cl_image_format image_format;
+        image_format.image_channel_order = channel_order;
+        image_format.image_channel_data_type = ChannelType;
+        const size_t pixel_size = get_pixel_size(&image_format);
+        const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
+        cl_image_desc image_desc;
+        image_desc.image_type = ImageType;
+        if (ImageType == CL_MEM_OBJECT_IMAGE1D)
+        {
+            image_desc.image_width = 2048;
+            image_desc.image_height = 1;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
+        {
+            image_desc.image_width = 256;
+            image_desc.image_height = 256;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
+        {
+            image_desc.image_width = 64;
+            image_desc.image_height = 64;
+            image_desc.image_depth = 64;
+        }
+        image_desc.image_array_size = 0;
+        image_desc.image_row_pitch = image_desc.image_width * pixel_size;
+        image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
+        std::vector<channel_type> random_image_values = generate_input(
+            image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
+            image_test::channel_min(), image_test::channel_max(),
+            std::vector<channel_type>()
+        );
+        const size_t count = num_elements;
+        std::vector<coord_type> coords = generate_input(
+            count,
+            detail::make_value<coord_type>(0),
+            coord_type {
+                static_cast<cl_int>(image_desc.image_width - 1),
+                static_cast<cl_int>(image_desc.image_height - 1),
+                static_cast<cl_int>(image_desc.image_depth - 1),
+                0
+            },
+            std::vector<coord_type>()
+        );
+        std::vector<element_type> input(count);
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+            // Use read_image_pixel from harness/imageHelpers to fill input values
+            // (it will deal with correct channels, orders etc.)
+            read_image_pixel<scalar_element_type>(static_cast<void *>(, &image_info,
+                c.s[0], c.s[1], c.s[2],
+                input[i].s);
+        }
+        // image_row_pitch and image_slice_pitch must be 0, when clCreateImage is used with host_ptr = NULL
+        image_desc.image_row_pitch = 0;
+        image_desc.image_slice_pitch = 0;
+        cl_mem img = clCreateImage(context, CL_MEM_WRITE_ONLY,
+            &image_format, &image_desc, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateImage")
+        cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(coord_type) * count, static_cast<void *>(, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+        cl_mem input_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(element_type) * count, static_cast<void *>(, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+        error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(input_buffer), &input_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        const size_t global_size = count;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+        std::vector<channel_type> image_values(image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count);
+        const size_t origin[3] = { 0 };
+        const size_t region[3] = { image_desc.image_width, image_desc.image_height, image_desc.image_depth };
+        error = clEnqueueReadImage(
+            queue, img, CL_TRUE,
+            origin, region, 0, 0,
+            static_cast<void *>(,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+            const element_type expected = input[i];
+            element_type result;
+            read_image_pixel<scalar_element_type>(static_cast<void *>(, &image_info,
+                c.s[0], c.s[1], c.s[2],
+                result.s);
+            if (!are_equal(result, expected))
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Writing to coordinates %s failed. Expected: %s, got: %s",
+                    format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
+                );
+            }
+        }
+        clReleaseMemObject(img);
+        clReleaseMemObject(coords_buffer);
+        clReleaseMemObject(input_buffer);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        return error;
+    }
+template<cl_mem_object_type ImageType>
+int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    if (!is_test_supported(device))
+        return CL_SUCCESS;
+    int error = CL_SUCCESS;
+    for (auto channel_order : get_channel_orders(device))
+    {
+        error = image_test<ImageType, CL_SIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_FLOAT>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
+} // namespace
diff --git a/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp b/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp
new file mode 100644
index 0000000..98da450
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp
@@ -0,0 +1,142 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "common.hpp"
+#include <type_traits>
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_mad24 : public ternary_func<IN1, IN2, IN3, OUT1>
+    std::string str()
+    {
+        return "mad24";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        static_assert(
+            std::is_same<cl_uint, IN1>::value || std::is_same<cl_int, IN1>::value,
+            "Function takes only signed/unsigned integers."
+        );
+        return (x * y) + z;
+    }
+    IN1 min1()
+    {
+        return 0;
+    }
+    IN1 max1()
+    {
+        return (std::numeric_limits<IN1>::max)() & IN1(0x00FFFF);
+    }
+    IN2 min2()
+    {
+        return 0;
+    }
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() & IN2(0x00FFFF);
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_mul24 : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "mul24";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        static_assert(
+            std::is_same<cl_uint, IN1>::value || std::is_same<cl_int, IN1>::value,
+            "Function takes only signed/unsigned integers."
+        );
+        return x * y;
+    }
+    IN1 min1()
+    {
+        return 0;
+    }
+    IN1 max1()
+    {
+        return (std::numeric_limits<IN1>::max)() & IN1(0x00FFFF);
+    }
+    IN2 min2()
+    {
+        return 0;
+    }
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() & IN2(0x00FFFF);
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // intn mad24(intn x, intn y, intn z);
+    // uintn mad24(uintn x, uintn y, uintn z);
+    TEST_TERNARY_FUNC_MACRO((int_func_mad24<cl_int, cl_int, cl_int, cl_int>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad24<cl_uint, cl_uint, cl_uint, cl_uint>()))
+    // intn mul24(intn x, intn y);
+    // uintn mul24(uintn x, uintn y);
+    TEST_BINARY_FUNC_MACRO((int_func_mul24<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_mul24<cl_uint, cl_uint, cl_uint>()))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/integer_funcs/CMakeLists.txt b/test_conformance/clcpp/integer_funcs/CMakeLists.txt
new file mode 100644
index 0000000..ba4cfe8
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp b/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp
new file mode 100644
index 0000000..13ca156
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp
@@ -0,0 +1,232 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "common.hpp"
+#include <type_traits>
+template<class IN1, class OUT1>
+struct int_func_popcount : public unary_func<IN1, OUT1>
+    std::string str()
+    {
+        return "popcount";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(IN1 x)
+    {
+        OUT1 count = 0;
+        for (count = 0; x != 0; count++)
+        {
+            x &= x - 1;
+        }
+        return count;
+    }
+template<class IN1, class OUT1>
+struct int_func_clz : public unary_func<IN1, OUT1>
+    std::string str()
+    {
+        return "clz";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(IN1 x)
+    {
+        OUT1 count = 0;
+        if(std::is_unsigned<IN1>::value)
+        {
+            cl_ulong value = x;
+            value <<= 8 * sizeof(value) - (8 * sizeof(x));
+            for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+            {
+                value <<= 1;
+            }
+        }
+        else
+        {            
+            cl_long value = x;
+            value <<= 8 * sizeof(value) - (8 * sizeof(x));
+            for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+            {
+                value <<= 1;
+            }
+        }
+        return count;
+    }
+template<class IN1, class OUT1>
+struct int_func_ctz : public unary_func<IN1, OUT1>
+    std::string str()
+    {
+        return "ctz";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(IN1 x)
+    {
+        if(x == 0)
+            return sizeof(x);
+        OUT1 count = 0;
+        IN1 value = x;
+        for(count = 0; 0 == (value & 0x1); count++)
+        {
+            value >>= 1;
+        }
+        return count;
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_rotate : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "rotate";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(IN1 value, IN2 shift)
+    {
+        static_assert(
+            std::is_unsigned<IN1>::value,
+            "Only unsigned integers are supported"
+        );
+        if ((shift &= sizeof(value)*8 - 1) == 0)
+            return value;
+        return (value << shift) | (value >> (sizeof(value)*8 - shift));
+    }
+    IN2 min2()
+    {
+        return 0;
+    }
+    IN2 max2()
+    {
+        return sizeof(IN1) * 8;
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_upsample : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "upsample";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(IN1 hi, IN2 lo)
+    {
+        static_assert(
+            sizeof(IN1) == sizeof(IN2),
+            "sizeof(IN1) != sizeof(IN2)"
+        );
+        static_assert(
+            sizeof(OUT1) == 2 * sizeof(IN1),
+            "sizeof(OUT1) != 2 * sizeof(IN1)"
+        );
+        static_assert(
+            std::is_unsigned<IN2>::value,
+            "IN2 type must be unsigned"
+        );
+        return (static_cast<OUT1>(hi) << (8*sizeof(IN1))) | lo;
+    }
+    IN2 min2()
+    {
+        return 0;
+    }
+    IN2 max2()
+    {
+        return sizeof(IN1) * 8;
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_int, cl_int>()))
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_long, cl_long>()))
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_ulong, cl_ulong>()))
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_int, cl_int>()))
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_long, cl_long>()))
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_ulong, cl_ulong>()))
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_int, cl_int>()))
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_long, cl_long>()))
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_ulong, cl_ulong>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rotate<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rotate<cl_ulong, cl_ulong, cl_ulong>()))
+    // shortn upsample(charn hi, ucharn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_char, cl_uchar, cl_short>()))
+    // ushortn upsample(ucharn hi, ucharn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_uchar, cl_uchar, cl_ushort>()))
+    // intn upsample(shortn hi, ushortn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_short, cl_ushort, cl_int>()))
+    // uintn upsample(ushortn hi, ushortn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_ushort, cl_ushort, cl_uint>()))
+    // longn upsample(intn hi, uintn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_int, cl_uint, cl_long>()))
+    // ulongn upsample(uintn hi, uintn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_uint, cl_uint, cl_ulong>()))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/integer_funcs/common.hpp b/test_conformance/clcpp/integer_funcs/common.hpp
new file mode 100644
index 0000000..f04811e
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/common.hpp
@@ -0,0 +1,26 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <random>
+#include <limits>
+#include <algorithm>
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
diff --git a/test_conformance/clcpp/integer_funcs/main.cpp b/test_conformance/clcpp/integer_funcs/main.cpp
new file mode 100644
index 0000000..ab2664a
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/main.cpp
@@ -0,0 +1,26 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "bitwise_funcs.hpp"
+#include "numeric_funcs.hpp"
+#include "24bit_funcs.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp b/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp
new file mode 100644
index 0000000..21d75c5
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp
@@ -0,0 +1,703 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "common.hpp"
+#include <type_traits>
+template<class IN1, class OUT1>
+struct int_func_abs : public unary_func<IN1, OUT1>
+    std::string str()
+    {
+        return "abs";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_unsigned<OUT1>::value,
+            "OUT1 type must be unsigned"
+        );
+        if(x < IN1(0))
+            return static_cast<OUT1>(-x);
+        return static_cast<OUT1>(x);
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_abs_diff : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "abs_diff";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_unsigned<OUT1>::value,
+            "OUT1 type must be unsigned"
+        );
+        if(x < y)
+            return static_cast<OUT1>(y-x);
+        return static_cast<OUT1>(x-y);
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_add_sat : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "add_sat";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN2>::value,
+            "OUT1 must be IN2"
+        );
+        // sat unsigned integers
+        if(std::is_unsigned<OUT1>::value)
+        {
+            OUT1 z = x + y;
+            if(z < x || z < y)
+                return (std::numeric_limits<OUT1>::max)();
+            return z;
+        }
+        // sat signed integers
+        OUT1 z = x + y;
+        if(y > 0)
+        {
+            if(z < x)
+                return (std::numeric_limits<OUT1>::max)();
+        }
+        else
+        {
+            if(z > x)
+                return (std::numeric_limits<OUT1>::min)();
+        }
+        return z;
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_hadd : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "hadd";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN2>::value,
+            "OUT1 must be IN2"
+        );
+        return (x >> OUT1(1)) + (y >> OUT1(1)) + (x & y & OUT1(1));
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_rhadd : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "rhadd";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN2>::value,
+            "OUT1 must be IN2"
+        );
+        return (x >> OUT1(1)) + (y >> OUT1(1)) + ((x | y) & OUT1(1));
+    }
+// clamp for scalars
+template<class IN1, class IN2, class IN3, class OUT1, class Enable = void>
+struct int_func_clamp : public ternary_func<IN1, IN2, IN3, OUT1>
+    std::string str()
+    {
+        return "clamp";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
+    {
+        static_assert(
+            std::is_same<IN2, IN3>::value,
+            "IN3 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN1>::value,
+            "OUT1 must be IN1"
+        );
+        return (std::min)((std::max)(x, minval), maxval);
+    }
+    IN2 min2()
+    {
+        return (std::numeric_limits<IN2>::min)();
+    }
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(2);
+    }
+    IN3 min3()
+    {
+        return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(2));
+    }
+    IN3 max3()
+    {
+        return (std::numeric_limits<IN3>::max)();
+    }
+// gentype clamp(gentype x, scalar minval, scalar maxval);
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_clamp<IN1, IN2, IN3, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public ternary_func<IN1, IN2, IN3, OUT1>
+    std::string str()
+    {
+        return "clamp";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
+    {
+        static_assert(
+            std::is_same<IN2, IN3>::value,
+            "IN3 must be IN2"
+        );
+        static_assert(
+            !is_vector_type<IN2>::value && !is_vector_type<IN3>::value,
+            "IN3 and IN2 must be scalar"
+        );
+        static_assert(
+            std::is_same<OUT1, IN1>::value,
+            "OUT1 must be IN1"
+        );
+        OUT1 result;
+        for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+        {
+            result.s[i] = (std::min)((std::max)(x.s[i], minval), maxval);
+        }
+        return result;
+    }
+    IN1 min1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 min1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
+        }
+        return min1;
+    }
+    IN1 max1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 max1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
+        }
+        return max1;
+    }
+    IN2 min2()
+    {
+        return (std::numeric_limits<IN2>::min)();
+    }
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(2);
+    }
+    IN3 min3()
+    {
+        return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(2));
+    }
+    IN3 max3()
+    {
+        return (std::numeric_limits<IN3>::max)();
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_mul_hi : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "mul_hi";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, OUT1>::value,
+            "Types must be the same"
+        );
+        static_assert(
+            !std::is_same<IN1, cl_long>::value && !std::is_same<IN1, cl_ulong>::value,
+            "Operation unimplemented for 64-bit scalars"
+        );  
+        cl_long xl = static_cast<cl_long>(x);
+        cl_long yl = static_cast<cl_long>(y);
+        return static_cast<OUT1>((xl * yl) >> (8 * sizeof(OUT1)));
+    }
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_mad_hi : public ternary_func<IN1, IN2, IN3, OUT1>
+    std::string str()
+    {
+        return "mad_hi";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "Types must be the same"
+        );   
+        return int_func_mul_hi<IN1, IN2, OUT1>()(x, y) + z;
+    }
+// This test is implemented only for unsigned integers
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_mad_sat : public ternary_func<IN1, IN2, IN3, OUT1>
+    std::string str()
+    {
+        return "mad_sat";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "Types must be the same"
+        );
+        static_assert(
+            std::is_unsigned<OUT1>::value,
+            "Test operation is not implemented for signed integers"
+        );  
+        // mad_sat unsigned integers
+        OUT1 w1 = (x * y);
+        if (x != 0 && w1 / x != y)
+            return (std::numeric_limits<OUT1>::max)();
+        OUT1 w2 = w1 + z;
+        if(w2 < w1)
+            return (std::numeric_limits<OUT1>::max)();
+        return w2;
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_sub_sat : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "sub_sat";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "IN1, IN2 and OUT1 must be the same types"
+        );
+        // sat unsigned integers
+        if(std::is_unsigned<OUT1>::value)
+        {
+            OUT1 z = x - y;
+            if(x < y)
+                return (std::numeric_limits<OUT1>::min)();
+            return z;
+        }
+        // sat signed integers
+        OUT1 z = x - y;
+        if(y < 0)
+        {
+            if(z < x)
+                return (std::numeric_limits<OUT1>::max)();
+        }
+        else
+        {
+            if(z > x)
+                return (std::numeric_limits<OUT1>::min)();
+        }
+        return z;
+    }
+template<class IN1, class IN2, class OUT1, class Enable = void>
+struct int_func_max : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "max";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "IN1, IN2 and OUT1 must be the same types"
+        );
+        return (std::max)(x, y);
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_max<IN1, IN2, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "max";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    IN1 min1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 min1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
+        }
+        return min1;
+    }
+    IN1 max1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 max1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
+        }
+        return max1;
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "IN1 and OUT1 must be the same types"
+        );
+        static_assert(
+            !is_vector_type<IN2>::value,
+            "IN2 must be scalar"
+        );
+        static_assert(
+            std::is_same<typename scalar_type<OUT1>::type, IN2>::value,
+            "IN2 must match with OUT1 and IN1"
+        );
+        IN1 result = x;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            result.s[i] = (std::max)(x.s[i], y);
+        }
+        return result;
+    }
+template<class IN1, class IN2, class OUT1, class Enable = void>
+struct int_func_min : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "min";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "IN1, IN2 and OUT1 must be the same types"
+        );
+        return (std::min)(x, y);
+    }
+template<class IN1, class IN2, class OUT1>
+struct int_func_min<IN1, IN2, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public binary_func<IN1, IN2, OUT1>
+    std::string str()
+    {
+        return "min";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+    IN1 min1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 min1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
+        }
+        return min1;
+    }
+    IN1 max1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 max1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
+        }
+        return max1;
+    }
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "IN1 and OUT1 must be the same types"
+        );
+        static_assert(
+            !is_vector_type<IN2>::value,
+            "IN2 must be scalar"
+        );
+        static_assert(
+            std::is_same<typename scalar_type<OUT1>::type, IN2>::value,
+            "IN2 must match with OUT1 and IN1"
+        );
+        IN1 result = x;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            result.s[i] = (std::min)(x.s[i], y);
+        }
+        return result;
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // ugentype abs(gentype x);
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_int, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_long, cl_ulong>()))
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_ulong, cl_ulong>()))
+    // ugentype abs_diff(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_int, cl_int, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_long, cl_long, cl_ulong>()))
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_ulong, cl_ulong, cl_ulong>()))
+    // gentype add_sat(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_ulong, cl_ulong, cl_ulong>()))
+    // gentype hadd(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_ulong, cl_ulong, cl_ulong>()))
+    // gentype rhadd(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_ulong, cl_ulong, cl_ulong>()))
+    // gentype clamp(gentype x, gentype minval, gentype maxval);
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_int, cl_int, cl_int, cl_int>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_uint, cl_uint, cl_uint, cl_uint>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_long, cl_long, cl_long, cl_long>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_ulong, cl_ulong, cl_ulong, cl_ulong>()))
+    // gentype clamp(gentype x, scalar minval, scalar maxval);
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_int2, cl_int, cl_int, cl_int2>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_uint4, cl_uint, cl_uint, cl_uint4>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_long8, cl_long, cl_long, cl_long8>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_ulong16, cl_ulong, cl_ulong, cl_ulong16>()))
+    // gentype mad_hi(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_short, cl_short, cl_short, cl_short>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_ushort, cl_ushort, cl_ushort, cl_ushort>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_int, cl_int, cl_int, cl_int>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_uint, cl_uint, cl_uint, cl_uint>()))
+    // gentype mad_sat(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_ushort, cl_ushort, cl_ushort, cl_ushort>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_uint, cl_uint, cl_uint, cl_uint>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_ulong, cl_ulong, cl_ulong, cl_ulong>()))
+    // gentype max(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_ulong, cl_ulong, cl_ulong>()))
+    // gentype max(gentype x, scalar y);
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_int2, cl_int, cl_int2>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_uint4, cl_uint, cl_uint4>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_long8, cl_long, cl_long8>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_ulong16, cl_ulong, cl_ulong16>()))
+    // gentype min(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_ulong, cl_ulong, cl_ulong>()))
+    // gentype min(gentype x, scalar y);
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_int2, cl_int, cl_int2>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_uint4, cl_uint, cl_uint4>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_long8, cl_long, cl_long8>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_ulong16, cl_ulong, cl_ulong16>()))
+    // gentype mul_hi(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_short, cl_short, cl_short>()))
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_ushort, cl_ushort, cl_ushort>())) 
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_uint, cl_uint, cl_uint>()))
+    // gentype sub_sat(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_ulong, cl_ulong, cl_ulong>()))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/math_funcs/CMakeLists.txt b/test_conformance/clcpp/math_funcs/CMakeLists.txt
new file mode 100644
index 0000000..c3b56c1
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/math_funcs/common.hpp b/test_conformance/clcpp/math_funcs/common.hpp
new file mode 100644
index 0000000..3224905
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/common.hpp
@@ -0,0 +1,347 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <cmath>
+#include <limits>
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include "reference.hpp"
+    #define MATH_FUNCS_CLASS_NAME(x, y) x ## _func_ ## y        
+struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public unary_func<cl_float, cl_float> \
+{ \
+    MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #OCL_FUNC; \
+    } \
+    \
+    std::string headers()  \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    /* Reference value type is cl_double */ \
+    cl_double operator()(const cl_float& x)  \
+    { \
+        return (HOST_FUNC)(static_cast<cl_double>(x)); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    template<class T> \
+    typename make_vector_type<cl_double, vector_size<T>::value>::type \
+    delta(const cl_float& in1, const T& expected) \
+    { \
+        typedef  \
+            typename make_vector_type<cl_double, vector_size<T>::value>::type \
+            delta_vector_type; \
+        (void) in1; \
+        auto e = detail::make_value<delta_vector_type>(DELTA); \
+        return detail::multiply<delta_vector_type>(e, expected); \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public binary_func<cl_float, cl_float, cl_float> \
+{ \
+    MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #OCL_NAME; \
+    } \
+    \
+    std::string headers()  \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    \
+    cl_float operator()(const cl_float& x, const cl_float& y)  \
+    { \
+        return (HOST_FUNC)(x, y); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    cl_float min2() \
+    { \
+        return MIN2; \
+    } \
+    \
+    cl_float max2() \
+    { \
+        return MAX2; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    std::vector<cl_float> in2_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    template<class T> \
+    typename make_vector_type<cl_double, vector_size<T>::value>::type \
+    delta(const cl_float& in1, const cl_float& in2, const T& expected) \
+    { \
+        typedef \
+            typename make_vector_type<cl_double, vector_size<T>::value>::type \
+            delta_vector_type; \
+        (void) in1; \
+        (void) in2; \
+        auto e = detail::make_value<delta_vector_type>(DELTA); \
+        return detail::multiply<delta_vector_type>(e, expected); \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public ternary_func<cl_float, cl_float, cl_float, cl_float> \
+{ \
+    MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #OCL_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    \
+    cl_double operator()(const cl_float& x, const cl_float& y, const cl_float& z)  \
+    { \
+        return (HOST_FUNC)(static_cast<cl_double>(x), static_cast<cl_double>(y), static_cast<cl_double>(z)); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    cl_float min2() \
+    { \
+        return MIN2; \
+    } \
+    \
+    cl_float max2() \
+    { \
+        return MAX2; \
+    } \
+    \
+    cl_float min3() \
+    { \
+        return MIN3; \
+    } \
+    \
+    cl_float max3() \
+    { \
+        return MAX3; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    std::vector<cl_float> in2_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    std::vector<cl_float> in3_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    template<class T> \
+    typename make_vector_type<cl_double, vector_size<T>::value>::type \
+    delta(const cl_float& in1, const cl_float& in2, const cl_float& in3, const T& expected) \
+    { \
+        typedef \
+            typename make_vector_type<cl_double, vector_size<T>::value>::type \
+            delta_vector_type; \
+        (void) in1; \
+        (void) in2; \
+        (void) in3; \
+        auto e = detail::make_value<delta_vector_type>(DELTA); \
+        return detail::multiply<delta_vector_type>(e, expected); \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
diff --git a/test_conformance/clcpp/math_funcs/comparison_funcs.hpp b/test_conformance/clcpp/math_funcs/comparison_funcs.hpp
new file mode 100644
index 0000000..0bd6ff9
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/comparison_funcs.hpp
@@ -0,0 +1,59 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <cmath>
+#include "common.hpp"
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fdim, std::fdim, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fmax, std::fmax, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fmin, std::fmin, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, maxmag, reference::maxmag, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, minmag, reference::minmag, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+// comparison functions
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+    TEST_BINARY_FUNC_MACRO((comparison_func_fdim(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_fmax(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_fmin(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_maxmag(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_minmag(is_embedded_profile)))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/math_funcs/exponential_funcs.hpp b/test_conformance/clcpp/math_funcs/exponential_funcs.hpp
new file mode 100644
index 0000000..82a8247
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/exponential_funcs.hpp
@@ -0,0 +1,139 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <cmath>
+#include "common.hpp"
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp, std::exp, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, expm1, std::expm1, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp2, std::exp2, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp10, reference::exp10, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+struct exponential_func_ldexp : public binary_func<cl_float, cl_int, cl_float>
+    exponential_func_ldexp(bool is_embedded) : m_is_embedded(is_embedded) 
+    {
+    }
+    std::string str()
+    {
+        return "ldexp";
+    }
+    std::string headers() 
+    {
+        return "#include <opencl_math>\n";
+    }
+    /* Reference value type is cl_double */
+    cl_double operator()(const cl_float& x, const cl_int& y) 
+    {
+        return (std::ldexp)(static_cast<cl_double>(x), y);
+    }
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+    cl_int min2()
+    {
+        return -8;
+    }
+    cl_int max2()
+    {
+        return 8;
+    }
+    std::vector<cl_float> in1_special_cases()
+    {
+        return { 
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+    bool use_ulp()
+    {
+        return true;
+    }
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+    bool m_is_embedded;
+// exponential functions
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+    // auto exp(gentype x);
+    // auto expm1(gentype x);
+    // auto exp2(gentype x);
+    // auto exp10(gentype x);
+    TEST_UNARY_FUNC_MACRO((exponential_func_exp(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((exponential_func_expm1(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((exponential_func_exp2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((exponential_func_exp10(is_embedded_profile)))
+    // auto ldexp(gentype x, intn k);
+    TEST_BINARY_FUNC_MACRO((exponential_func_ldexp(is_embedded_profile)))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp b/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp
new file mode 100644
index 0000000..63b4c23
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp
@@ -0,0 +1,733 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <cmath>
+#include "common.hpp"
+// -------------- UNARY FUNCTIONS
+// gentype ceil(gentype x);
+// gentype floor(gentype x);
+// gentype rint(gentype x);
+// gentype round(gentype x);
+// gentype trunc(gentype x);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, ceil, std::ceil, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, floor, std::floor, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, rint, std::rint, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, round, std::round, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, trunc, std::trunc, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+// floatn nan(uintn nancode);
+struct fp_func_nan : public unary_func<cl_uint, cl_float>
+    std::string str()
+    {
+        return "nan";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+    cl_float operator()(const cl_uint& x)
+    {
+        cl_uint r = x | 0x7fc00000U;
+        // cl_float and cl_int have the same size so that's correct
+        cl_float rf = *reinterpret_cast<cl_float*>(&r);
+        return rf;
+    }
+    cl_uint min1()
+    {
+        return 0;
+    }
+    cl_uint max1()
+    {
+        return 100;
+    }
+    std::vector<cl_uint> in1_special_cases()
+    {
+        return {
+            0, 1
+        };
+    }
+// gentype fract(gentype x, gentype* iptr);
+// Fuction fract() returns additional value via pointer (2nd argument). In order to test
+// if it's correct output buffer type is cl_float2. In first compontent we store what
+// fract() function returns, and in the 2nd component we store what is returned via its
+// 2nd argument (gentype* iptr).
+struct fp_func_fract : public unary_func<cl_float, cl_float2>
+    fp_func_fract(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+    }
+    std::string str()
+    {
+        return "fract";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+    cl_double2 operator()(const cl_float& x)
+    {
+        return reference::fract(static_cast<cl_double>(x));
+    }
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+    bool use_ulp()
+    {
+        return true;
+    }
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+    bool m_is_embedded;
+// We need to specialize generate_kernel_unary<>() function template for fp_func_fract.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<fp_func_fract, cl_float, cl_float2>(fp_func_fract func)
+    return
+        "__kernel void test_fract(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = fract(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+template <>
+std::string generate_kernel_unary<fp_func_fract, cl_float, cl_float2>(fp_func_fract func)
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_fract(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = fract(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+// gentype modf(gentype x, gentype* iptr);
+// Fuction modf() returns additional value via pointer (2nd argument). In order to test
+// if it's correct output buffer type is cl_float2. In first compontent we store what
+// modf() function returns, and in the 2nd component we store what is returned via its
+// 2nd argument (gentype* iptr).
+struct fp_func_modf : public unary_func<cl_float, cl_float2>
+    fp_func_modf(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+    }
+    std::string str()
+    {
+        return "modf";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+    cl_double2 operator()(const cl_float& x)
+    {
+        cl_double2 r;
+        r.s[0] = (std::modf)(static_cast<cl_double>(x), &(r.s[1]));
+        return r;
+    }
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+    bool use_ulp()
+    {
+        return true;
+    }
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+    bool m_is_embedded;
+// We need to specialize generate_kernel_unary<>() function template for fp_func_modf.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<fp_func_modf, cl_float, cl_float2>(fp_func_modf func)
+    return
+        "__kernel void test_modf(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = modf(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+template <>
+std::string generate_kernel_unary<fp_func_modf, cl_float, cl_float2>(fp_func_modf func)
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_modf(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = modf(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+// gentype frexp(gentype x, intn* exp);
+// Fuction frexp() returns additional value via pointer (2nd argument). In order to test
+// if it's correct output buffer type is cl_float2. In first compontent we store what
+// modf() function returns, and in the 2nd component we store what is returned via its
+// 2nd argument (intn* exp).
+struct fp_func_frexp : public unary_func<cl_float, cl_float2>
+    fp_func_frexp(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+    }
+    std::string str()
+    {
+        return "frexp";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+    cl_double2 operator()(const cl_float& x)
+    {
+        cl_double2 r;
+        cl_int e;
+        r.s[0] = (std::frexp)(static_cast<cl_double>(x), &e);
+        r.s[1] = static_cast<cl_float>(e);
+        return r;
+    }
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+    bool use_ulp()
+    {
+        return true;
+    }
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+    bool m_is_embedded;
+// We need to specialize generate_kernel_unary<>() function template for fp_func_frexp.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<fp_func_frexp, cl_float, cl_float2>(fp_func_frexp func)
+    return
+        "__kernel void test_frexp(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int itpr = 0;\n"
+        "    result.x = frexp(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+template <>
+std::string generate_kernel_unary<fp_func_frexp, cl_float, cl_float2>(fp_func_frexp func)
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_frexp(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int itpr = 0;\n"
+        "    result.x = frexp(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+// -------------- BINARY FUNCTIONS
+// gentype copysign(gentype x, gentype y);
+// gentype fmod(gentype x, gentype y);
+// gentype remainder(gentype x, gentype y);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(fp, copysign, std::copysign, true, 0.0f, 0.0f, 0.001f, -100.0f, 100.0f, -10.0f, 10.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(fp, fmod, std::fmod, true, 0.0f, 0.0f, 0.001f, -100.0f, 100.0f, -10.0f, 10.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(fp, remainder, std::remainder, true, 0.0f, 0.001f, 0.0f, -100.0f, 100.0f, -10.0f, 10.0f)
+// In case of function float nextafter(float, float) reference function must
+// operate on floats and return float.
+struct fp_func_nextafter : public binary_func<cl_float, cl_float, cl_float>
+    fp_func_nextafter(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+    }
+    std::string str()
+    {
+        return "nextafter";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+    /* In this case reference value type MUST BE cl_float */
+    cl_float operator()(const cl_float& x, const cl_float& y)
+    {
+        return (std::nextafter)(x, y);
+    }
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+    cl_float max1()
+    {
+        return 500.0f;
+    }
+    cl_float min2()
+    {
+        return 501.0f;
+    }
+    cl_float max2()
+    {
+        return 1000.0f;
+    }
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+    std::vector<cl_float> in2_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+    bool use_ulp()
+    {
+        return true;
+    }
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+    bool m_is_embedded;
+// gentype remquo(gentype x, gentype y, intn* quo);
+struct fp_func_remquo : public binary_func<cl_float, cl_float, cl_float2>
+    fp_func_remquo(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+    }
+    std::string str()
+    {
+        return "remquo";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+    cl_double2 operator()(const cl_float& x, const cl_float& y)
+    {
+        return reference::remquo(static_cast<cl_double>(x), static_cast<cl_double>(y));
+    }
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+    cl_float min2()
+    {
+        return -1000.0f;
+    }
+    cl_float max2()
+    {
+        return 1000.0f;
+    }
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+    std::vector<cl_float> in2_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+    bool use_ulp()
+    {
+        return true;
+    }
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+    bool m_is_embedded;
+// We need to specialize generate_kernel_binary<>() function template for fp_func_remquo.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_binary<fp_func_remquo, cl_float, cl_float, cl_float2>(fp_func_remquo func)
+    return
+        "__kernel void test_remquo(global float *input1, global float *input2, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int quo = 0;\n"
+        "    int sign = 0;\n"
+        "    result.x = remquo(input1[gid], input2[gid], &quo);\n"
+        // Specification say:
+        // "remquo also calculates the lower seven bits of the integral quotient x/y,
+        // and gives that value the same sign as x/y. It stores this signed value in
+        // the object pointed to by quo."
+        // Implemenation may save into quo more than seven bits. We need to take
+        // care of that here.
+        "    sign = (quo < 0) ? -1 : 1;\n"
+        "    quo = (quo < 0) ? -quo : quo;\n"
+        "    quo &= 0x0000007f;\n"
+        "    result.y = (sign < 0) ? -quo : quo;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+template <>
+std::string generate_kernel_binary<fp_func_remquo, cl_float, cl_float, cl_float2>(fp_func_remquo func)
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_remquo(global_ptr<float[]> input1, global_ptr<float[]> input2, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int quo = 0;\n"
+        "    int sign = 0;\n"
+        "    result.x = remquo(input1[gid], input2[gid], &quo);\n"
+        // Specification say:
+        // "remquo also calculates the lower seven bits of the integral quotient x/y,
+        // and gives that value the same sign as x/y. It stores this signed value in
+        // the object pointed to by quo."
+        // Implemenation may save into quo more than seven bits. We need to take
+        // care of that here.
+        "    sign = (quo < 0) ? -1 : 1;\n"
+        "    quo = (quo < 0) ? -quo : quo;\n"
+        "    quo &= 0x0000007f;\n"
+        "    result.y = (sign < 0) ? -quo : quo;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+// -------------- TERNARY FUNCTIONS
+// gentype fma(gentype a, gentype b, gentype c);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2, min3, max3
+MATH_FUNCS_DEFINE_TERNARY_FUNC(fp, fma, std::fma, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+// floating point functions
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+    // gentype ceil(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_ceil(is_embedded_profile)))
+    // gentype floor(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_floor(is_embedded_profile)))
+    // gentype rint(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_rint(is_embedded_profile)))
+    // gentype round(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_round(is_embedded_profile)))
+    // gentype trunc(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_trunc(is_embedded_profile)))
+    // floatn nan(uintn nancode);
+    TEST_UNARY_FUNC_MACRO((fp_func_nan()))
+    // gentype fract(gentype x, gentype* iptr);
+    TEST_UNARY_FUNC_MACRO((fp_func_fract(is_embedded_profile)))
+    // gentype modf(gentype x, gentype* iptr);
+    TEST_UNARY_FUNC_MACRO((fp_func_modf(is_embedded_profile)))
+    // gentype frexp(gentype x, intn* exp);
+    TEST_UNARY_FUNC_MACRO((fp_func_frexp(is_embedded_profile)))
+    // gentype remainder(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_remainder(is_embedded_profile)))
+    // gentype copysign(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_copysign(is_embedded_profile)))
+    // gentype fmod(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_fmod(is_embedded_profile)))
+    // gentype nextafter(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_nextafter(is_embedded_profile)))
+    // gentype remquo(gentype x, gentype y, intn* quo);
+    TEST_BINARY_FUNC_MACRO((fp_func_remquo(is_embedded_profile)))
+    // gentype fma(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((fp_func_fma(is_embedded_profile)))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/math_funcs/half_math_funcs.hpp b/test_conformance/clcpp/math_funcs/half_math_funcs.hpp
new file mode 100644
index 0000000..d72d717
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/half_math_funcs.hpp
@@ -0,0 +1,106 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <cmath>
+#include "common.hpp"
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)  
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, cos, half_cos, std::cos, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sin, half_sin, std::sin, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, tan, half_tan, std::tan, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp, half_exp, std::exp, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp2, half_exp2, std::exp2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp10, half_exp10, reference::exp10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log, half_log, std::log, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log2, half_log2, std::log2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log10, half_log10, std::log10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, rsqrt, half_rsqrt, reference::rsqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sqrt, half_sqrt, std::sqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, recip, half_recip, reference::recip, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, divide, half_divide, reference::divide, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, powr, half_powr, reference::powr, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, cos, half_math::cos, std::cos, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sin, half_math::sin, std::sin, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, tan, half_math::tan, std::tan, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp, half_math::exp, std::exp, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp2, half_math::exp2, std::exp2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp10, half_math::exp10, reference::exp10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log, half_math::log, std::log, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log2, half_math::log2, std::log2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log10, half_math::log10, std::log10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, rsqrt, half_math::rsqrt, reference::rsqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sqrt, half_math::sqrt, std::sqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, recip, half_math::recip, reference::recip, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, divide, half_math::divide, reference::divide, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, powr, half_math::powr, reference::powr, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+// comparison functions
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+    TEST_UNARY_FUNC_MACRO((half_math_func_cos(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_sin(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_tan(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_exp(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_exp2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_exp10(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_log(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_log2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_log10(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((half_math_func_divide(is_embedded_profile)))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp b/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp
new file mode 100644
index 0000000..cd25d8e
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp
@@ -0,0 +1,260 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <cmath>
+#include "common.hpp"
+namespace detail
+// This function reads values of FP_ILOGB0 and FP_ILOGBNAN macros defined on the device.
+// OpenCL C++ Spec:
+// The value of FP_ILOGB0 shall be either {INT_MIN} or {INT_MAX}. The value of FP_ILOGBNAN
+// shall be either {INT_MAX} or {INT_MIN}.
+int get_ilogb_nan_zero(cl_device_id device, cl_context context, cl_command_queue queue, cl_int& ilogb_nan, cl_int& ilogb_zero)
+    cl_mem buffers[1];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+    std::string code_str =
+        "__kernel void get_ilogb_nan_zero(__global int *out)\n"
+        "{\n"
+        "   out[0] = FP_ILOGB0;\n"
+        "   out[1] = FP_ILOGBNAN;\n"
+        "}\n";
+    std::string kernel_name("get_ilogb_nan_zero");
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    std::vector<cl_int> output = generate_output<cl_int>(2);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    work_size[0] = 1;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_int) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    // Save
+    ilogb_zero = output[0];
+    ilogb_nan = output[1];
+    clReleaseMemObject(buffers[0]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+} // namespace detail
+struct logarithmic_func_ilogb : public unary_func<cl_float, cl_int>
+    logarithmic_func_ilogb(cl_int ilogb_nan, cl_int ilogb_zero)
+        : m_ilogb_nan(ilogb_nan), m_ilogb_zero(ilogb_zero)
+    {
+    }
+    std::string str()
+    {
+        return "ilogb";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+    cl_int operator()(const cl_float& x)
+    {
+        if((std::isnan)(x))
+        {
+            return m_ilogb_nan;
+        }
+        else if(x == 0.0 || x == -0.0)
+        {
+            return m_ilogb_zero;
+        }
+        static_assert(
+            sizeof(cl_int) == sizeof(int),
+            "Tests assumes that sizeof(cl_int) == sizeof(int)"
+        );
+        return (std::ilogb)(x);
+    }
+    cl_float min1()
+    {
+        return -100.0f;
+    }
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+    cl_int m_ilogb_nan;
+    cl_int m_ilogb_zero;
+// gentype log(gentype x);
+// gentype logb(gentype x);
+// gentype log2(gentype x);
+// gentype log10(gentype x);
+// gentype log1p(gentype x);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log, std::log, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, logb, std::logb, true, 0.0f, 0.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log2, std::log2, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log10, std::log10, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log1p, std::log1p, true, 2.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+// gentype lgamma(gentype x);
+// OpenCL C++ Spec.:
+// The ULP values for built-in math functions lgamma and lgamma_r is currently undefined.
+// Because of that we don't check ULP and set acceptable delta to 0.2f (20%).
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, lgamma, std::lgamma, false, 0.0f, 0.0f, 0.2f, -10.0f, 1000.0f)
+// gentype lgamma_r(gentype x, intn* signp);
+// OpenCL C++ Spec.:
+// The ULP values for built-in math functions lgamma and lgamma_r is currently undefined.
+// Because of that we don't check ULP and set acceptable delta to 0.2f (20%).
+// Note:
+// We DO NOT test if sign of the gamma function return by lgamma_r is correct.
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, lgamma_r, std::lgamma, false, 0.0f, 0.0f, 0.2f, -10.0f, 1000.0f)
+// We need to specialize generate_kernel_unary<>() function template for logarithmic_func_lgamma_r
+// because it takes two arguments, but only one of it is input, the 2nd one is used to return
+// the sign of the gamma function.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<logarithmic_func_lgamma_r, cl_float, cl_float>(logarithmic_func_lgamma_r func)
+    return
+        "__kernel void test_lgamma_r(global float *input, global float *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    int sign;\n"
+        "    output[gid] = lgamma_r(input[gid], &sign);\n"
+        "}\n";
+template <>
+std::string generate_kernel_unary<logarithmic_func_lgamma_r, cl_float, cl_float>(logarithmic_func_lgamma_r func)
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_lgamma_r(global_ptr<float[]> input, global_ptr<float[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    int sign;\n"
+        "    output[gid] = lgamma_r(input[gid], &sign);\n"
+        "}\n";
+// logarithmic functions
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+    // Write values of FP_ILOGB0 and FP_ILOGBNAN, which are macros defined on the device, to
+    // ilogb_zero and ilogb_nan.
+    cl_int ilogb_nan = 0;
+    cl_int ilogb_zero = 0;
+    error = detail::get_ilogb_nan_zero(device, context, queue, ilogb_nan, ilogb_zero);
+    RETURN_ON_ERROR_MSG(error, "detail::get_ilogb_nan_zero function failed");
+    // intn ilogb(gentype x);
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_ilogb(ilogb_nan, ilogb_zero)))
+    // gentype log(gentype x);
+    // gentype logb(gentype x);
+    // gentype log2(gentype x);
+    // gentype log10(gentype x);
+    // gentype log1p(gentype x);
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_logb(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log10(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log1p(is_embedded_profile)))
+    // gentype lgamma(gentype x);
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_lgamma(is_embedded_profile)))
+    // gentype lgamma(gentype x);
+    //
+    // Note:
+    // We DO NOT test if sign of the gamma function return by lgamma_r is correct
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_lgamma_r(is_embedded_profile)))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/math_funcs/main.cpp b/test_conformance/clcpp/math_funcs/main.cpp
new file mode 100644
index 0000000..aada85f
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/main.cpp
@@ -0,0 +1,50 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <limits>
+#include "../common.hpp"
+#include "comparison_funcs.hpp"
+#include "exponential_funcs.hpp"
+#include "floating_point_funcs.hpp"
+#include "half_math_funcs.hpp"
+#include "logarithmic_funcs.hpp"
+#include "other_funcs.hpp"
+#include "power_funcs.hpp"
+#include "trigonometric_funcs.hpp"
+int main(int argc, const char *argv[])
+    // Check if cl_float (float) and cl_double (double) fulfill the requirements of
+    // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
+    if(!std::numeric_limits<cl_float>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    if(!std::numeric_limits<cl_double>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/math_funcs/other_funcs.hpp b/test_conformance/clcpp/math_funcs/other_funcs.hpp
new file mode 100644
index 0000000..f939a56
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/other_funcs.hpp
@@ -0,0 +1,75 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <cmath>
+#include "common.hpp"
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, erfc, std::erfc, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, erf, std::erf, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, fabs, std::fabs, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, tgamma, std::tgamma, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(other, hypot, std::hypot, true, 4.0f, 4.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2, min3, max3
+MATH_FUNCS_DEFINE_TERNARY_FUNC(other, mad, reference::mad, false, 0.0f, 0.0f, 0.1f, -10.0f, 10.0f, -10.0f, 10.0f, -10.0f, 10.0f)
+// other functions
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+    // gentype erf(gentype x);
+    // gentype erfc(gentype x);
+    TEST_UNARY_FUNC_MACRO((other_func_erfc(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((other_func_erf(is_embedded_profile)))
+    // gentype fabs(gentype x);
+    TEST_UNARY_FUNC_MACRO((other_func_fabs(is_embedded_profile)))
+    // gentype tgamma(gentype x);
+    TEST_UNARY_FUNC_MACRO((other_func_tgamma(is_embedded_profile)))
+    // gentype hypot(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((other_func_hypot(is_embedded_profile)))
+    // gentype mad(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((other_func_mad(is_embedded_profile)))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/math_funcs/power_funcs.hpp b/test_conformance/clcpp/math_funcs/power_funcs.hpp
new file mode 100644
index 0000000..2ace9b3
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/power_funcs.hpp
@@ -0,0 +1,153 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <limits>
+#include <type_traits>
+#include <cmath>
+#include "common.hpp"
+struct power_func_ ## NAME : public binary_func<cl_float, cl_int, cl_float> \
+{ \
+    power_func_ ## NAME(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #NAME; \
+    } \
+    \
+    std::string headers()  \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    /* Reference value type is cl_double */ \
+    cl_double operator()(const cl_float& x, const cl_int& y)  \
+    { \
+        return (HOST_FUNC)(static_cast<cl_double>(x), y); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    cl_int min2() \
+    { \
+        return MIN2; \
+    } \
+    \
+    cl_int max2() \
+    { \
+        return MAX2; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(-1.0f), \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+        }; \
+    } \
+    \
+    std::vector<cl_int> in2_special_cases() \
+    { \
+        return {  \
+            2, 3, -1, 1, -2, 2 \
+        }; \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(power, cbrt, std::cbrt, true, 2.0f, 4.0f, 0.001f, -1000.0f, -9.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(power, rsqrt, reference::rsqrt, true, 2.0f, 4.0f, 0.001f, 1.0f, 100.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(power, sqrt, std::sqrt, true, 3.0f, 4.0f, 0.001f, 1.0f, 100.0f)
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(power, pow, std::pow, true, 16.0f, 16.0f, 0.001f, 1.0f, 100.0f, 1.0f, 10.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(power, powr, reference::powr, true, 16.0f, 16.0f, 0.001f, 1.0f, 100.0f, 1.0f, 10.0f)
+// func_name, reference_func, use_ulp, ulp, ulp_for_embedded, min1, max1, min2, max2
+DEFINE_BINARY_POWER_FUNC_INT(pown, std::pow, true, 16.0f, 16.0f, 1.0f, 100.0f, 1, 10)
+DEFINE_BINARY_POWER_FUNC_INT(rootn, reference::rootn, true, 16.0f, 16.0f, -100.0f, 100.0f, -10, 10)
+// power functions
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+    // gentype cbrt(gentype x);
+    // gentype rsqrt(gentype x);
+    // gentype sqrt(gentype x);
+    TEST_UNARY_FUNC_MACRO((power_func_cbrt(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((power_func_sqrt(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((power_func_rsqrt(is_embedded_profile)))
+    // gentype pow(gentype x, gentype y);
+    // gentype powr(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((power_func_pow(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((power_func_powr(is_embedded_profile)))
+    // gentype pown(gentype x, intn y);
+    // gentype rootn(gentype x, intn y);
+    TEST_BINARY_FUNC_MACRO((power_func_pown(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((power_func_rootn(is_embedded_profile)))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/math_funcs/reference.hpp b/test_conformance/clcpp/math_funcs/reference.hpp
new file mode 100644
index 0000000..0f5fc2f
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/reference.hpp
@@ -0,0 +1,315 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <cmath>
+#include <limits>
+#include "../common.hpp"
+namespace reference
+    // Reference functions for OpenCL comparison functions that
+    // are not already defined in STL.
+    cl_float maxmag(const cl_float& x, const cl_float& y)
+    {
+        if((std::abs)(x) > (std::abs)(y))
+        {
+            return x;
+        }
+        else if((std::abs)(y) > (std::abs)(x))
+        {
+            return y;
+        }
+        return (std::fmax)(x, y);
+    }
+    cl_float minmag(const cl_float& x, const cl_float& y)
+    {
+        if((std::abs)(x) < (std::abs)(y))
+        {
+            return x;
+        }
+        else if((std::abs)(y) < (std::abs)(x))
+        {
+            return y;
+        }
+        return (std::fmin)(x, y);
+    }
+    // Reference functions for OpenCL exp functions that
+    // are not already defined in STL.
+    cl_double exp10(const cl_double& x)
+    {   
+        // 10^x = exp2( x * log2(10) )
+        auto log2_10 = (std::log2)(static_cast<long double>(10.0));
+        cl_double x_log2_10 = static_cast<cl_double>(x * log2_10);
+        return (std::exp2)(x_log2_10);
+    }
+    // Reference functions for OpenCL floating point functions that
+    // are not already defined in STL.
+    cl_double2 fract(cl_double x)
+    {
+        // Copied from math_brute_force/reference_math.c
+        cl_double2 r;
+        if((std::isnan)(x))
+        {
+            r.s[0] = std::numeric_limits<cl_double>::quiet_NaN();
+            r.s[1] = std::numeric_limits<cl_double>::quiet_NaN();
+            return r;
+        }
+        r.s[0] = (std::modf)(x, &(r.s[1]));
+        if(r.s[0] < 0.0 )
+        {
+            r.s[0] = 1.0f + r.s[0];
+            r.s[1] -= 1.0f;
+            if( r.s[0] == 1.0f )
+                r.s[0] = HEX_FLT(+, 1, fffffe, -, 1);
+        }
+        return r;
+    }
+    cl_double2 remquo(cl_double x, cl_double y)
+    {
+        cl_double2 r;
+        // remquo return the same value that is returned by the
+        // remainder function
+        r.s[0] = (std::remainder)(x,y);
+        // calulcate quo
+        cl_double x_y = (x - r.s[0]) / y;
+        cl_uint quo = (std::abs)(x_y);
+        r.s[1] = quo & 0x0000007fU;
+        if(x_y < 0.0)
+            r.s[1] = -r.s[1];
+        // fix edge cases
+        if(!(std::isnan)(x) && y == 0.0)
+        {
+            r.s[1] = 0;
+        }
+        else if((std::isnan)(x) && (std::isnan)(y))
+        {
+            r.s[1] = 0;
+        }
+        return r;
+    }
+    // Reference functions for OpenCL half_math:: functions that
+    // are not already defined in STL.
+    cl_double divide(cl_double x, cl_double y)
+    {
+        return x / y;
+    }
+    cl_double recip(cl_double x)
+    {
+        return 1.0 / x;
+    }
+    // Reference functions for OpenCL other functions that
+    // are not already defined in STL.
+    cl_double mad(cl_double x, cl_double y, cl_double z)
+    {
+        return (x * y) + z;
+    }
+    // Reference functions for OpenCL power functions that
+    // are not already defined in STL.
+    cl_double rsqrt(const cl_double& x)
+    {
+        return cl_double(1.0) / ((std::sqrt)(x));
+    }
+    cl_double powr(const cl_double& x, const cl_double& y)
+    {
+        //powr(x, y) returns NaN for x < 0.
+        if( x < 0.0 )
+            return std::numeric_limits<cl_double>::quiet_NaN();
+        //powr ( x, NaN ) returns the NaN for x >= 0.
+        //powr ( NaN, y ) returns the NaN.
+        if((std::isnan)(x) || (std::isnan)(y) )
+            return std::numeric_limits<cl_double>::quiet_NaN();
+        if( x == 1.0 )
+        {
+            //powr ( +1, +-inf ) returns NaN.
+            if((std::abs)(y) == INFINITY )
+                return std::numeric_limits<cl_double>::quiet_NaN();
+            //powr ( +1, y ) is 1 for finite y. (NaN handled above)
+            return 1.0;
+        }
+        if( y == 0.0 )
+        {
+            //powr ( +inf, +-0 ) returns NaN.
+            //powr ( +-0, +-0 ) returns NaN.
+            if( x == 0.0 || x == std::numeric_limits<cl_double>::infinity())
+                return std::numeric_limits<cl_double>::quiet_NaN();
+            //powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already handled above)
+            return 1.0;
+        }
+        if( x == 0.0 )
+        {
+            //powr ( +-0, -inf) is +inf.
+            //powr ( +-0, y ) is +inf for finite y < 0.
+            if( y < 0.0 )
+                return std::numeric_limits<cl_double>::infinity();
+            //powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
+            return 0.0;
+        }
+        // x = +inf
+        if( (std::isinf)(x) )
+        {
+            if( y < 0 )
+                return 0;
+            return std::numeric_limits<cl_double>::infinity();
+        }
+        double fabsx = (std::abs)(x);
+        double fabsy = (std::abs)(y);
+        //y = +-inf cases
+        if( (std::isinf)(fabsy) )
+        {
+            if( y < 0.0 )
+            {
+                if( fabsx < 1.0 )
+                    return std::numeric_limits<cl_double>::infinity();
+                return 0;
+            }
+            if( fabsx < 1.0 )
+                return 0.0;
+            return std::numeric_limits<cl_double>::infinity();
+        }        
+        return (std::pow)(x, y);
+    }
+    cl_double rootn(const cl_double& x, const cl_int n)
+    {
+        //rootn (x, 0) returns a NaN.
+        if(n == 0)
+            return std::numeric_limits<cl_double>::quiet_NaN();
+        //rootn ( x, n )  returns a NaN for x < 0 and n is even.
+        if(x < 0 && 0 == (n & 1))
+            return std::numeric_limits<cl_double>::quiet_NaN();
+        if(x == 0.0)
+        {
+            if(n > 0)
+            {
+                //rootn ( +-0,  n ) is +0 for even n > 0.
+                if(0 == (n & 1))
+                {
+                    return cl_double(0.0);
+                }
+                //rootn ( +-0,  n ) is +-0 for odd n > 0.
+                else
+                {
+                    return x;
+                }
+            }
+            else
+            {
+                //rootn ( +-0,  n ) is +inf for even n < 0.
+                if(0 == ((-n) & 1))
+                {
+                    return std::numeric_limits<cl_double>::infinity();
+                }
+                //rootn ( +-0,  n ) is +-inf for odd n < 0.
+                else
+                {
+                    return (std::copysign)(
+                        std::numeric_limits<cl_double>::infinity(), x
+                    );
+                }   
+            }
+        }
+        cl_double r = (std::abs)(x);
+        r = (std::exp2)((std::log2)(r) / static_cast<cl_double>(n));
+        return (std::copysign)(r, x);
+    }
+    // Reference functions for OpenCL trigonometric functions that
+    // are not already defined in STL.
+    cl_double acospi(cl_double x)
+    {
+        return (std::acos)(x) / CL_M_PI;
+    }
+    cl_double asinpi(cl_double x)
+    {
+        return (std::asin)(x) / CL_M_PI;
+    }
+    cl_double atanpi(cl_double x)
+    {
+        return (std::atan)(x) / CL_M_PI;
+    }
+    cl_double cospi(cl_double x)
+    {
+        return (std::cos)(x * CL_M_PI);
+    }
+    cl_double sinpi(cl_double x)
+    {
+        return (std::sin)(x * CL_M_PI);
+    }
+    cl_double tanpi(cl_double x)
+    {
+        return (std::tan)(x * CL_M_PI);
+    }
+    cl_double atan2(cl_double x, cl_double y)
+    {
+    #if defined(WIN32) || defined(_WIN32) 
+        // Fix edge cases for Windows
+        if ((std::isinf)(x) && (std::isinf)(y)) {
+            cl_double retval = (y > 0) ? CL_M_PI_4 : 3.f * CL_M_PI_4;
+            return (x > 0) ? retval : -retval;
+        }
+    #endif // defined(WIN32) || defined(_WIN32) 
+        return (std::atan2)(x, y);
+    }
+    cl_double atan2pi(cl_double x, cl_double y)
+    {
+        return ::reference::atan2(x, y) / CL_M_PI;
+    }
+    cl_double2 sincos(cl_double x)
+    {
+        cl_double2 r;
+        r.s[0] = (std::sin)(x);
+        r.s[1] = (std::cos)(x);
+        return r;
+    }
diff --git a/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp b/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp
new file mode 100644
index 0000000..343024a
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp
@@ -0,0 +1,222 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <cmath>
+#include "common.hpp"
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acos, std::acos, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acosh, std::acosh, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acospi, reference::acospi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asin, std::asin, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asinh, std::asinh, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asinpi, reference::asinpi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atan, std::atan, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atanh, std::atanh, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atanpi, reference::atanpi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+// For (sin/cos/tan)pi functions min input value is -0.24 and max input value is 0.24,
+// so (CL_M_PI * x) is never greater than CL_M_PI_F.
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cos, std::cos, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cosh, std::cosh, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cospi, reference::cospi, true, 4.0f, 4.0f, 0.001f, -0.24, -0.24f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sin, std::sin, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sinh, std::sinh, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sinpi, reference::sinpi, true, 4.0f, 4.0f, 0.001f, -0.24, -0.24f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tan, std::tan, true, 5.0f, 5.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tanh, std::tanh, true, 5.0f, 5.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tanpi, reference::tanpi, true, 6.0f, 6.0f, 0.001f, -0.24, -0.24f)
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(trigonometric, atan2, reference::atan2, true, 6.0f, 6.0f, 0.001f, -1.0f, 1.0f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(trigonometric, atan2pi, reference::atan2pi, true, 6.0f, 6.0f, 0.001f, -1.0f, 1.0f, -1.0f, 1.0f)
+// gentype sincos(gentype x, gentype * cosval);
+// Fact that second argument is a pointer is inconvenient.
+// We don't want to modify all helper functions defined in funcs_test_utils.hpp
+// that run test kernels generated based on this class and check if results are
+// correct, so instead of having two output cl_float buffers, one for sines and
+// one for cosines values, we use one cl_float2 output buffer (first component is
+// sine, second is cosine).
+// Below we also define specialization of generate_kernel_unary function template
+// for trigonometric_func_sincos.
+struct trigonometric_func_sincos : public unary_func<cl_float, cl_float2>
+    trigonometric_func_sincos(bool is_embedded) : m_is_embedded(is_embedded) 
+    {
+    }
+    std::string str()
+    {
+        return "sincos";
+    }
+    std::string headers() 
+    {
+        return "#include <opencl_math>\n";
+    }
+    /* Reference value type is cl_double */
+    cl_double2 operator()(const cl_float& x) 
+    {
+        return (reference::sincos)(static_cast<cl_double>(x));
+    }
+    cl_float min1()
+    {
+        return -CL_M_PI_F;
+    }
+    cl_float max1()
+    {
+        return CL_M_PI_F;
+    }
+    bool use_ulp()
+    {
+        return true;
+    }
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 4.0f;
+        }
+        return 4.0f;
+    }
+    bool m_is_embedded;
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)    
+template <>
+std::string generate_kernel_unary<trigonometric_func_sincos, cl_float, cl_float2>(trigonometric_func_sincos func)
+    return 
+        "__kernel void test_sincos(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 sine_cosine_of_x;\n"
+        "    float cosine_of_x = 0;\n"
+        "    sine_cosine_of_x.x = sincos(input[gid], &(cosine_of_x));\n"
+        "    sine_cosine_of_x.y = cosine_of_x;\n"
+        "    output[gid] = sine_cosine_of_x;\n"
+        "}\n";
+template <>
+std::string generate_kernel_unary<trigonometric_func_sincos, cl_float, cl_float2>(trigonometric_func_sincos func)
+    return         
+        "" + func.defs() + 
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_sincos(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 sine_cosine_of_x;\n"
+        "    float cosine_of_x = 0;\n"
+        "    sine_cosine_of_x.x = sincos(input[gid], &(cosine_of_x));\n"
+        "    sine_cosine_of_x.y = cosine_of_x;\n"
+        "    output[gid] = sine_cosine_of_x;\n"
+        "}\n";
+// trigonometric functions
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+    // gentype acos(gentype x);
+    // gentype acosh(gentype x);
+    // gentype acospi(gentype x);
+    // gentype asin(gentype x);
+    // gentype asinh(gentype x);
+    // gentype asinpi(gentype x);
+    // gentype atan(gentype x);
+    // gentype atanh(gentype x);
+    // gentype atanpi(gentype x);
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_acos(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_acosh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_acospi(is_embedded_profile))) 
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_asin(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_asinh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_asinpi(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_atan(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_atanh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_atanpi(is_embedded_profile)))
+    // gentype cos(gentype x);
+    // gentype cosh(gentype x);
+    // gentype cospi(gentype x);
+    // gentype sin(gentype x);
+    // gentype sinh(gentype x);
+    // gentype sinpi(gentype x);
+    // gentype tan(gentype x);
+    // gentype tanh(gentype x);
+    // gentype tanpi(gentype x);
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_cos(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_cosh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_cospi(is_embedded_profile))) 
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sin(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sinh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sinpi(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_tan(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_tanh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_tanpi(is_embedded_profile)))
+    // gentype atan2(gentype y, gentype x);
+    // gentype atan2pi(gentype y, gentype x);
+    TEST_BINARY_FUNC_MACRO((trigonometric_func_atan2(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((trigonometric_func_atan2pi(is_embedded_profile)))
+    // gentype sincos(gentype x, gentype * cosval);
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sincos(is_embedded_profile)))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/pipes/CMakeLists.txt b/test_conformance/clcpp/pipes/CMakeLists.txt
new file mode 100644
index 0000000..65daae9
--- /dev/null
+++ b/test_conformance/clcpp/pipes/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/pipes/main.cpp b/test_conformance/clcpp/pipes/main.cpp
new file mode 100644
index 0000000..de6e622
--- /dev/null
+++ b/test_conformance/clcpp/pipes/main.cpp
@@ -0,0 +1,25 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_pipes.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/pipes/test_pipes.hpp b/test_conformance/clcpp/pipes/test_pipes.hpp
new file mode 100644
index 0000000..3fc30dc
--- /dev/null
+++ b/test_conformance/clcpp/pipes/test_pipes.hpp
@@ -0,0 +1,632 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+namespace test_pipes {
+enum class pipe_source
+    param,
+    storage
+enum class pipe_operation
+    work_item,
+    work_item_reservation,
+    work_group_reservation,
+    sub_group_reservation
+struct test_options
+    pipe_operation operation;
+    pipe_source source;
+    int max_packets;
+    int num_packets;
+struct output_type
+    cl_uint write_reservation_is_valid;
+    cl_uint write_success;
+    cl_uint num_packets;
+    cl_uint max_packets;
+    cl_uint read_reservation_is_valid;
+    cl_uint read_success;
+    cl_uint value;
+const std::string source_common = R"(
+struct output_type
+    uint write_reservation_is_valid;
+    uint write_success;
+    uint num_packets;
+    uint max_packets;
+    uint read_reservation_is_valid;
+    uint read_success;
+    uint value;
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << source_common;
+    if (options.operation == pipe_operation::work_item)
+    {
+        s << R"(
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        output[gid].write_reservation_is_valid = 1;
+        uint value = gid;
+        output[gid].write_success = write_pipe(out_pipe, &value) == 0;
+    }
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        output[gid].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid].max_packets = get_pipe_max_packets(in_pipe);
+        output[gid].read_reservation_is_valid = 1;
+        uint value;
+        output[gid].read_success = read_pipe(in_pipe, &value) == 0;
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_item_reservation)
+    {
+        s << R"(
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+        reserve_id_t reservation = reserve_write_pipe(out_pipe, 2);
+        output[gid + 0].write_reservation_is_valid = is_valid_reserve_id(reservation);
+        output[gid + 1].write_reservation_is_valid = is_valid_reserve_id(reservation);
+        uint value0 = gid + 0;
+        uint value1 = gid + 1;
+        output[gid + 0].write_success = write_pipe(out_pipe, reservation, 0, &value0) == 0;
+        output[gid + 1].write_success = write_pipe(out_pipe, reservation, 1, &value1) == 0;
+        commit_write_pipe(out_pipe, reservation);
+    }
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+        output[gid + 0].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid + 0].max_packets = get_pipe_max_packets(in_pipe);
+        output[gid + 1].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid + 1].max_packets = get_pipe_max_packets(in_pipe);
+        reserve_id_t reservation = reserve_read_pipe(in_pipe, 2);
+        output[gid + 0].read_reservation_is_valid = is_valid_reserve_id(reservation);
+        output[gid + 1].read_reservation_is_valid = is_valid_reserve_id(reservation);
+        uint value0;
+        uint value1;
+        output[gid + 0].read_success = read_pipe(in_pipe, reservation, 1, &value0) == 0;
+        output[gid + 1].read_success = read_pipe(in_pipe, reservation, 0, &value1) == 0;
+        commit_read_pipe(in_pipe, reservation);
+        output[gid + 0].value = value0;
+        output[gid + 1].value = value1;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_group_reservation)
+    {
+        s << R"(
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        reserve_id_t reservation = work_group_reserve_write_pipe(out_pipe, get_local_size(0));
+        output[gid].write_reservation_is_valid = is_valid_reserve_id(reservation);
+        uint value = gid;
+        output[gid].write_success = write_pipe(out_pipe, reservation, get_local_id(0), &value) == 0;
+        work_group_commit_write_pipe(out_pipe, reservation);
+    }
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        output[gid].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid].max_packets = get_pipe_max_packets(in_pipe);
+        reserve_id_t reservation = work_group_reserve_read_pipe(in_pipe, get_local_size(0));
+        output[gid].read_reservation_is_valid = is_valid_reserve_id(reservation);
+        uint value;
+        output[gid].read_success = read_pipe(in_pipe, reservation, get_local_size(0) - 1 - get_local_id(0), &value) == 0;
+        work_group_commit_read_pipe(in_pipe, reservation);
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::sub_group_reservation)
+    {
+        s << R"(
+    #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        reserve_id_t reservation = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size());
+        output[gid].write_reservation_is_valid = is_valid_reserve_id(reservation);
+        uint value = gid;
+        output[gid].write_success = write_pipe(out_pipe, reservation, get_sub_group_local_id(), &value) == 0;
+        sub_group_commit_write_pipe(out_pipe, reservation);
+    }
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        output[gid].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid].max_packets = get_pipe_max_packets(in_pipe);
+        reserve_id_t reservation = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size());
+        output[gid].read_reservation_is_valid = is_valid_reserve_id(reservation);
+        uint value;
+        output[gid].read_success = read_pipe(in_pipe, reservation, get_sub_group_size() - 1 - get_sub_group_local_id(), &value) == 0;
+        sub_group_commit_read_pipe(in_pipe, reservation);
+        output[gid].value = value;
+    }
+    )";
+    }
+    return s.str();
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_common>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+    #include <opencl_pipe>
+    using namespace cl;
+    )";
+    s << source_common;
+    std::string init_out_pipe;
+    std::string init_in_pipe;
+    if (options.source == pipe_source::param)
+    {
+        init_out_pipe = "auto out_pipe = pipe_param;";
+        init_in_pipe = "auto in_pipe = pipe_param;";
+    }
+    else if (options.source == pipe_source::storage)
+    {
+        s << "pipe_storage<uint, " << std::to_string(options.max_packets) << "> storage;";
+        init_out_pipe = "auto out_pipe = storage.get<pipe_access::write>();";
+        init_in_pipe = "auto in_pipe = make_pipe(storage);";
+    }
+    if (options.operation == pipe_operation::work_item)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+        output[gid].write_reservation_is_valid = 1;
+        uint value = gid;
+        output[gid].write_success = out_pipe.write(value);
+    }
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+        output[gid].num_packets = in_pipe.num_packets();
+        output[gid].max_packets = in_pipe.max_packets();
+        output[gid].read_reservation_is_valid = 1;
+        uint value;
+        output[gid].read_success =;
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_item_reservation)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+        auto reservation = out_pipe.reserve(2);
+        output[gid + 0].write_reservation_is_valid = reservation.is_valid();
+        output[gid + 1].write_reservation_is_valid = reservation.is_valid();
+        uint value0 = gid + 0;
+        uint value1 = gid + 1;
+        output[gid + 0].write_success = reservation.write(0, value0);
+        output[gid + 1].write_success = reservation.write(1, value1);
+        reservation.commit();
+    }
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+        output[gid + 0].num_packets = in_pipe.num_packets();
+        output[gid + 0].max_packets = in_pipe.max_packets();
+        output[gid + 1].num_packets = in_pipe.num_packets();
+        output[gid + 1].max_packets = in_pipe.max_packets();
+        auto reservation = in_pipe.reserve(2);
+        output[gid + 0].read_reservation_is_valid = reservation.is_valid();
+        output[gid + 1].read_reservation_is_valid = reservation.is_valid();
+        uint value0;
+        uint value1;
+        output[gid + 0].read_success =, value0);
+        output[gid + 1].read_success =, value1);
+        reservation.commit();
+        output[gid + 0].value = value0;
+        output[gid + 1].value = value1;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_group_reservation)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+        auto reservation = out_pipe.work_group_reserve(get_local_size(0));
+        output[gid].write_reservation_is_valid = reservation.is_valid();
+        uint value = gid;
+        output[gid].write_success = reservation.write(get_local_id(0), value);
+        reservation.commit();
+    }
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+        output[gid].num_packets = in_pipe.num_packets();
+        output[gid].max_packets = in_pipe.max_packets();
+        auto reservation = in_pipe.work_group_reserve(get_local_size(0));
+        output[gid].read_reservation_is_valid = reservation.is_valid();
+        uint value;
+        output[gid].read_success = - 1 - get_local_id(0), value);
+        reservation.commit();
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::sub_group_reservation)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+        auto reservation = out_pipe.sub_group_reserve(get_sub_group_size());
+        output[gid].write_reservation_is_valid = reservation.is_valid();
+        uint value = gid;
+        output[gid].write_success = reservation.write(get_sub_group_local_id(), value);
+        reservation.commit();
+    }
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+        output[gid].num_packets = in_pipe.num_packets();
+        output[gid].max_packets = in_pipe.max_packets();
+        auto reservation = in_pipe.sub_group_reserve(get_sub_group_size());
+        output[gid].read_reservation_is_valid = reservation.is_valid();
+        uint value;
+        output[gid].read_success = - 1 - get_sub_group_local_id(), value);
+        reservation.commit();
+        output[gid].value = value;
+    }
+    )";
+    }
+    return s.str();
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+    int error = CL_SUCCESS;
+    if (options.num_packets % 2 != 0 || options.max_packets < options.num_packets)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Invalid test options")
+    }
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    if (options.operation == pipe_operation::sub_group_reservation && !is_extension_available(device, "cl_khr_subgroups"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+    cl_program program;
+    cl_kernel producer_kernel;
+    cl_kernel consumer_kernel;
+    std::string producer_kernel_name = "producer";
+    std::string consumer_kernel_name = "consumer";
+    std::string source = generate_source(options);
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &producer_kernel,
+        source, producer_kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &producer_kernel,
+        source, producer_kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+    consumer_kernel = clCreateKernel(program, consumer_kernel_name.c_str(), &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel")
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &producer_kernel,
+        source, producer_kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    consumer_kernel = clCreateKernel(program, consumer_kernel_name.c_str(), &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel")
+    size_t max_work_group_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    const size_t count = options.num_packets;
+    const size_t local_size = (std::min)((size_t)256, max_work_group_size);
+    const size_t global_size = count;
+    const cl_uint packet_size = sizeof(cl_uint);
+    cl_mem pipe = clCreatePipe(context, 0, packet_size, options.max_packets, NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreatePipe")
+    cl_mem output_buffer;
+    output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * count, NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    const char pattern = 0;
+    error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+    error = clSetKernelArg(producer_kernel, 0, sizeof(cl_mem), &pipe);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(producer_kernel, 1, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clEnqueueNDRangeKernel(queue, producer_kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    error = clSetKernelArg(consumer_kernel, 0, sizeof(cl_mem), &pipe);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(consumer_kernel, 1, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clEnqueueNDRangeKernel(queue, consumer_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    std::vector<output_type> output(count);
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(output_type) * count,
+        static_cast<void *>(,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    std::vector<bool> existing_values(count, false);
+    for (size_t gid = 0; gid < count; gid++)
+    {
+        const output_type &o = output[gid];
+        if (!o.write_reservation_is_valid)
+        {
+            RETURN_ON_ERROR_MSG(-1, "write reservation is not valid")
+        }
+        if (!o.write_success)
+        {
+            RETURN_ON_ERROR_MSG(-1, "write did not succeed")
+        }
+        if (o.num_packets == 0 || o.num_packets > options.num_packets)
+        {
+            RETURN_ON_ERROR_MSG(-1, "num_packets did not return correct value")
+        }
+        if (o.max_packets != options.max_packets)
+        {
+            RETURN_ON_ERROR_MSG(-1, "max_packets did not return correct value")
+        }
+        if (!o.read_reservation_is_valid)
+        {
+            RETURN_ON_ERROR_MSG(-1, "read reservation is not valid")
+        }
+        if (!o.read_success)
+        {
+            RETURN_ON_ERROR_MSG(-1, "read did not succeed")
+        }
+        // Every value must be presented once in any order
+        if (o.value >= count || existing_values[o.value])
+        {
+            RETURN_ON_ERROR_MSG(-1, "kernel did not return correct value")
+        }
+        existing_values[o.value] = true;
+    }
+    clReleaseMemObject(pipe);
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(producer_kernel);
+    clReleaseKernel(consumer_kernel);
+    clReleaseProgram(program);
+    return error;
+const pipe_operation pipe_operations[] = {
+    pipe_operation::work_item,
+    pipe_operation::work_item_reservation,
+    pipe_operation::work_group_reservation,
+    pipe_operation::sub_group_reservation
+const std::tuple<int, int> max_and_num_packets[] = {
+    std::make_tuple<int, int>(2, 2),
+    std::make_tuple<int, int>(10, 8),
+    std::make_tuple<int, int>(256, 254),
+    std::make_tuple<int, int>(1 << 16, 1 << 16),
+    std::make_tuple<int, int>((1 << 16) + 5, 1 << 16),
+    std::make_tuple<int, int>(12345, 12344),
+    std::make_tuple<int, int>(1 << 18, 1 << 18)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    std::vector<std::tuple<int, int>> ps;
+    for (auto p : max_and_num_packets)
+    {
+        if (std::get<0>(p) < num_elements)
+            ps.push_back(p);
+    }
+    ps.push_back(std::tuple<int, int>(num_elements, num_elements));
+    int error = CL_SUCCESS;
+    for (auto operation : pipe_operations)
+    for (auto p : ps)
+    {
+        test_options options;
+        options.source = pipe_source::param;
+        options.max_packets = std::get<0>(p);
+        options.num_packets = std::get<1>(p);
+        options.operation = operation;
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    std::vector<std::tuple<int, int>> ps;
+    for (auto p : max_and_num_packets)
+    {
+        if (std::get<0>(p) < num_elements)
+            ps.push_back(p);
+    }
+    ps.push_back(std::tuple<int, int>(num_elements, num_elements));
+    int error = CL_SUCCESS;
+    for (auto operation : pipe_operations)
+    for (auto p : ps)
+    {
+        test_options options;
+        options.source = pipe_source::storage;
+        options.max_packets = std::get<0>(p);
+        options.num_packets = std::get<1>(p);
+        options.operation = operation;
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+    return error;
+} // namespace
diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt b/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt
new file mode 100644
index 0000000..fd36d30
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp b/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp
new file mode 100644
index 0000000..35bf81c
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp
@@ -0,0 +1,284 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <algorithm>
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+    last_error = run_ps_ctor_dtor_test(  \
+        device, context, queue, count, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+// Base class for all tests for kernels with program scope object with
+// non-trivial ctors and/or dtors
+struct ps_ctors_dtors_test_base : public detail::base_func_type<cl_uint>
+    // ctor is true, if and only if OpenCL program of this test contains program
+    // scope variable with non-trivial ctor.
+    // dtor is true, if and only if OpenCL program of this test contains program
+    // scope variable with non-trivial dtor.
+    ps_ctors_dtors_test_base(const bool ctor,
+                             const bool dtor)
+        : m_ctor(ctor), m_dtor(dtor)
+    {
+    }
+    virtual ~ps_ctors_dtors_test_base() { };
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    virtual std::string generate_program() = 0;
+    // Returns kernel names IN ORDER
+    virtual std::vector<std::string> get_kernel_names()
+    {
+        // Typical case, that is, only one kernel
+        return { this->get_kernel_name() };
+    }
+    // Returns value that is expected to be in output_buffer[i]
+    virtual cl_uint operator()(size_t i) = 0;
+    // Executes kernels
+    // Typical case: execute every kernel once, every kernel has only
+    // one argument, that is, output buffer
+    virtual cl_int execute(const std::vector<cl_kernel>& kernels,
+                           cl_mem& output_buffer,
+                           cl_command_queue& queue,
+                           size_t work_size)
+    {
+        cl_int err;
+        for(auto& k : kernels)
+        {
+            err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+            err = clEnqueueNDRangeKernel(
+                queue, k, 1,
+                NULL, &work_size, NULL,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        }
+        return err;
+    }
+    // This method check if queries for CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT
+    // and CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT using clGetProgramInfo()
+    // return correct values
+    virtual cl_int ctors_dtors_present_queries(cl_program program)
+    {
+        cl_int error = CL_SUCCESS;
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return error;
+        #else
+            // This indicates that the program object contains non-trivial constructor(s) that will be
+            // executed by runtime before any kernel from the program is executed.
+            // This indicates that the program object contains non-trivial destructor(s) that will be
+            // executed by runtime when program is destroyed.
+            cl_bool ctors_present;
+            size_t cl_bool_size;
+            error = clGetProgramInfo(
+                program,
+                sizeof(cl_bool),
+                static_cast<void*>(&ctors_present),
+                &cl_bool_size
+            );
+            RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+            if(cl_bool_size != sizeof(cl_bool))
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).\n",
+                    cl_bool_size,
+                    sizeof(cl_bool)
+                );
+            }
+            cl_bool dtors_present = 0;
+            error = clGetProgramInfo(
+                program,
+                sizeof(cl_bool),
+                static_cast<void*>(&ctors_present),
+                &cl_bool_size
+            );
+            RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+            if(cl_bool_size != sizeof(cl_bool))
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).\n",
+                    cl_bool_size,
+                    sizeof(cl_bool)
+                );
+            }
+            // check constructors
+            if(m_ctor && ctors_present != CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 0, should be: 1.\n"
+                );
+            }
+            else if(!m_ctor && ctors_present == CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 1, should be: 0.\n"
+                );
+            }
+            // check destructors
+            if(m_dtor && dtors_present != CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 0, should be: 1.\n"
+                );
+            }
+            else if(!m_dtor && dtors_present == CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 1, should be: 0.\n"
+                );
+            }
+            return error;
+        #endif
+    }
+    bool m_ctor;
+    bool m_dtor;
+template <class ps_ctor_dtor_test>
+int run_ps_ctor_dtor_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, ps_ctor_dtor_test op)
+    cl_mem buffers[1];
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    size_t work_size[1];
+    cl_int err;
+    std::string code_str = op.generate_program();
+    std::vector<std::string> kernel_names = op.get_kernel_names();
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+    work_size[0] = count;
+    // host output vector
+    std::vector<cl_uint> output = generate_output<cl_uint>(work_size[0], 9999);
+    // device output buffer
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    // Execute test
+    err = op.execute(kernels, buffers[0], queue, work_size[0]);
+    // Check if queries returns correct values
+    err = op.ctors_dtors_present_queries(program);
+    RETURN_ON_ERROR(err);
+    // Release kernels and program
+    // Destructors should be called now
+    for(auto& k : kernels)
+    {
+        err = clReleaseKernel(k);
+        RETURN_ON_CL_ERROR(err, "clReleaseKernel");
+    }
+    err = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(err, "clReleaseProgram");
+    // Finish
+    err = clFinish(queue);
+    RETURN_ON_CL_ERROR(err, "clFinish");
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    // Check output values
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        cl_uint v = op(i);
+        if(!(are_equal(v, output[i], detail::make_value<cl_uint>(0), op)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<cl_uint>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+    log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<cl_uint>().c_str());
+    clReleaseMemObject(buffers[0]);
+    return err;
diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp b/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp
new file mode 100644
index 0000000..08c1908
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp
@@ -0,0 +1,24 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_ctors_dtors.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp b/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp
new file mode 100644
index 0000000..c9ac082
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp
@@ -0,0 +1,324 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "common.hpp"
+// Test for program scope variable with non-trivial ctor
+struct ps_ctor_test : public ps_ctors_dtors_test_base
+    ps_ctor_test(const cl_uint test_value)
+        : ps_ctors_dtors_test_base(true, false),
+          m_test_value(test_value)
+    {
+    }
+    std::string str()
+    {
+        return "ps_ctor_test";
+    }
+    std::vector<std::string> get_kernel_names()
+    {
+        return { 
+            this->str() + "_set",
+            this->str() + "_read"
+        };
+    }
+    // Returns value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i)
+    {
+        if(i % 2 == 0)
+            return m_test_value;
+        return cl_uint(0xbeefbeef);
+    }
+    // In 1st kernel 0th work-tem sets member m_x of program scope variable global_var to
+    // m_test_value and m_y to uint(0xbeefbeef),
+    // In 2nd kernel:
+    // 1) if global id is even, then work-item reads global_var.m_x and writes it to output[its-global-id];
+    // 2) otherwise, work-item reads global_var.m_y and writes it to output[its-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_names()[0] + "(global uint *output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   output[gid] = 0xbeefbeef;\n"
+                "}\n"
+                "__kernel void " + this->get_kernel_names()[1] + "(global uint *output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   if(gid % 2 == 0)\n"
+                "      output[gid] = " + std::to_string(m_test_value) + ";\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // struct template
+                "template<class T>\n"    
+                "struct ctor_test_class_base {\n"
+                // non-trivial ctor
+                "   ctor_test_class_base(T x) { m_x = x;};\n"
+                "   T m_x;\n"
+                "};\n"
+                // struct template
+                "template<class T>\n"    
+                "struct ctor_test_class : public ctor_test_class_base<T> {\n"
+                // non-trivial ctor
+                "   ctor_test_class(T x, T y) : ctor_test_class_base<T>(x), m_y(y) { };\n"
+                "   T m_y;\n"
+                "};\n"
+                // global scope program variables
+                "ctor_test_class<uint> global_var(uint(0), uint(0));\n"
+                "__kernel void " + this->get_kernel_names()[0] + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   if(gid == 0) {\n"
+                "       global_var.m_x = " + std::to_string(m_test_value) + ";\n"  
+                "       global_var.m_y = 0xbeefbeef;\n"  
+                "   }\n"
+                "}\n"
+                "__kernel void " + this->get_kernel_names()[1] + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   if(gid % 2 == 0)\n"
+                "      output[gid] = global_var.m_x;\n"
+                "   else\n"
+                "      output[gid] = global_var.m_y;\n"
+                "}\n";        
+        #endif
+    }
+    cl_uint m_test_value;
+// Test for program scope variable with non-trivial dtor
+struct ps_dtor_test : public ps_ctors_dtors_test_base
+    ps_dtor_test(const cl_uint test_value)
+        : ps_ctors_dtors_test_base(false, true),
+          m_test_value(test_value)
+    {
+    }
+    std::string str()
+    {
+        return "ps_dtor_test";
+    }
+    // Returns value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i)
+    {
+        if(i % 2 == 0)
+            return m_test_value;
+        return 1;
+    }
+    // In 1st kernel 0th work-item saves pointer to output buffer and its size in program scope
+    // variable global_var, it also sets counter to 1;
+    // After global_var is destroyed all even elements of output buffer should equal m_test_value, 
+    // and all odd should equal 1.
+    // If odd elements of output buffer are >1 it means dtor was executed more than once.
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    if(gid % 2 == 0)\n"
+                "        output[gid] = " + std::to_string(m_test_value) + ";\n"
+                "    else\n"
+                "        output[gid] = 1;\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // struct template
+                "template<class T>\n"
+                "struct dtor_test_class_base {\n"
+                // non-trivial dtor
+                // set all odd elements in buffer to counter
+                "   ~dtor_test_class_base() {\n"
+                "       for(size_t i = 1; i < this->size; i+=2)\n"
+                "       {\n"
+                "           this->buffer[i] = counter;\n"
+                "       }\n"
+                "       counter++;\n"
+                "   };\n"
+                "   global_ptr<uint[]> buffer;\n"
+                "   size_t size;\n"
+                "   T counter;\n"
+                "};\n" 
+                // struct   
+                "struct dtor_test_class : public dtor_test_class_base<uint> {\n"
+                // non-trivial dtor
+                // set all values in buffer to m_test_value
+                "   ~dtor_test_class() {\n"
+                "       for(size_t i = 0; i < this->size; i+=2)\n"
+                "           this->buffer[i] = " + std::to_string(m_test_value) + ";\n"
+                "   };\n"
+                "};\n" 
+                // global scope program variable
+                "dtor_test_class global_var;\n"
+                // When global_var is being destroyed, first dtor ~dtor_test_class is called,
+                // and then ~dtor_test_class_base is called.
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                // set buffer and size in global var
+                "   if(gid == 0){\n"
+                "       global_var.buffer = output;\n"
+                "       global_var.size = get_global_size(0);\n"
+                "       global_var.counter = 1;\n"
+                "   }\n"
+                "}\n";
+        #endif
+    }
+    cl_uint m_test_value;
+// Test for program scope variable with both non-trivial ctor
+// and non-trivial dtor
+struct ps_ctor_dtor_test : public ps_ctors_dtors_test_base
+    ps_ctor_dtor_test(const cl_uint test_value)
+        : ps_ctors_dtors_test_base(false, true),
+          m_test_value(test_value)
+    {
+    }
+    std::string str()
+    {
+        return "ps_ctor_dtor_test";
+    }
+    // Returns value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i)
+    {
+        return m_test_value;
+    }
+    // In 1st kernel 0th work-item saves pointer to output buffer and its size in program scope
+    // variable global_var.
+    // After global_var is destroyed all even elements of output buffer should equal m_test_value, 
+    // and all odd should equal 1.
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = " + std::to_string(m_test_value) + ";\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // struct template
+                "template<class T>\n"    
+                "struct ctor_test_class {\n"
+                // non-trivial ctor
+                "   ctor_test_class(T value) : m_value(value) { };\n"
+                "   T m_value;\n"
+                "};\n\n"
+                // struct   
+                "struct ctor_dtor_test_class {\n"
+                // non-trivial ctor
+                "   ctor_dtor_test_class(uint value) : ctor_test(value) { } \n"
+                // non-trivial dtor
+                // set all values in buffer to m_test_value
+                "   ~ctor_dtor_test_class() {\n"
+                "       for(size_t i = 0; i < this->size; i++)\n"
+                "       {\n"
+                "          this->buffer[i] = ctor_test.m_value;\n"            
+                "       }\n"
+                "   };\n"
+                "   ctor_test_class<uint> ctor_test;\n"
+                "   global_ptr<uint[]> buffer;\n"
+                "   size_t size;\n"
+                "};\n" 
+                // global scope program variable
+                "ctor_dtor_test_class global_var(" + std::to_string(m_test_value) + ");\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                // set buffer and size in global var
+                "   if(gid == 0){\n"
+                "       global_var.buffer = output;\n"
+                "       global_var.size = get_global_size(0);\n"
+                "   }\n"
+                "}\n";
+        #endif
+    }
+    cl_uint m_test_value;
+// This contains tests for program scope (global) constructors and destructors, more
+// detailed tests are also in clcpp/api.
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    RUN_PS_CTORS_DTORS_TEST_MACRO(ps_ctor_test(0xdeadbeefU))
+    RUN_PS_CTORS_DTORS_TEST_MACRO(ps_dtor_test(0xbeefdeadU))
+    RUN_PS_CTORS_DTORS_TEST_MACRO(ps_ctor_dtor_test(0xdeaddeadU))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/reinterpret/CMakeLists.txt b/test_conformance/clcpp/reinterpret/CMakeLists.txt
new file mode 100644
index 0000000..ed02c56
--- /dev/null
+++ b/test_conformance/clcpp/reinterpret/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/reinterpret/as_type.hpp b/test_conformance/clcpp/reinterpret/as_type.hpp
new file mode 100644
index 0000000..da088cf
--- /dev/null
+++ b/test_conformance/clcpp/reinterpret/as_type.hpp
@@ -0,0 +1,223 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include <cstring>
+template<class IN1, class OUT1>
+struct as_type : public unary_func<IN1, OUT1>
+    static_assert(sizeof(IN1) == sizeof(OUT1), "It is an error to use the as_type<T> operator to reinterpret data to a type of a different number of bytes");
+    std::string str()
+    {
+        return "as_type";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_reinterpret>\n";
+    }
+    OUT1 operator()(const IN1& x)
+    {
+        return *reinterpret_cast<const OUT1*>(&x);
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_as_type(func_type func)
+    std::string in1_value = "input[gid]";
+    std::string function_call = "as_" + type_name<out_type>() + "(" + in1_value + ");";
+    return
+        "__kernel void test_" + func.str() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_as_type(func_type func)
+    std::string headers = func.headers();
+    std::string in1_value = "input[gid]";
+    std::string function_call = "as_type<" + type_name<out_type>() + ">(" + in1_value + ")";
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + type_name<in_type>() +  "[]> input,"
+                                              "global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+template<class INPUT, class OUTPUT, class as_type_op>
+bool verify_as_type(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, as_type_op op)
+    // When the operand and result type contain a different number of elements, the result is implementation-defined,
+    // i.e. any result is correct
+    if (vector_size<INPUT>::value == vector_size<OUTPUT>::value)
+    {
+        for (size_t i = 0; i < in.size(); i++)
+        {
+            auto expected = op(in[i]);
+            if (std::memcmp(&expected, &out[i], sizeof(expected)) != 0)
+            {
+                print_error_msg(expected, out[i], i, op);
+                return false;
+            }
+        }
+    }
+    return true;
+template <class as_type_op>
+int test_as_type_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, as_type_op op)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int error;
+    typedef typename as_type_op::in_type INPUT;
+    typedef typename as_type_op::out_type OUTPUT;
+    // Don't run test for unsupported types
+    if (!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    std::string code_str = generate_kernel_as_type<as_type_op, INPUT, OUTPUT>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(error)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(INPUT) * input.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(OUTPUT) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+    error = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+    error = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    work_size[0] = count;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+    error = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    if (!verify_as_type(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    last_error = test_as_type_func( \
+        device, context, queue, n_elems, as_type<TYPE1, TYPE2>() \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+    TEST_AS_TYPE_MACRO(cl_int, cl_int)
+    TEST_AS_TYPE_MACRO(cl_uint, cl_int)
+    TEST_AS_TYPE_MACRO(cl_int, cl_ushort2)
+    TEST_AS_TYPE_MACRO(cl_uchar, cl_uchar)
+    TEST_AS_TYPE_MACRO(cl_char4, cl_ushort2)
+    TEST_AS_TYPE_MACRO(cl_uchar16, cl_char16)
+    TEST_AS_TYPE_MACRO(cl_short8, cl_uchar16)
+    TEST_AS_TYPE_MACRO(cl_float4, cl_uint4)
+    TEST_AS_TYPE_MACRO(cl_float16, cl_int16)
+    TEST_AS_TYPE_MACRO(cl_long2, cl_float4)
+    TEST_AS_TYPE_MACRO(cl_ulong, cl_long)
+    TEST_AS_TYPE_MACRO(cl_ulong16, cl_double16)
+    TEST_AS_TYPE_MACRO(cl_uchar16, cl_double2)
+    TEST_AS_TYPE_MACRO(cl_ulong4, cl_short16)
+    if (error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/reinterpret/main.cpp b/test_conformance/clcpp/reinterpret/main.cpp
new file mode 100644
index 0000000..8eddf1d
--- /dev/null
+++ b/test_conformance/clcpp/reinterpret/main.cpp
@@ -0,0 +1,25 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "as_type.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/relational_funcs/CMakeLists.txt b/test_conformance/clcpp/relational_funcs/CMakeLists.txt
new file mode 100644
index 0000000..3a8389c
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/relational_funcs/common.hpp b/test_conformance/clcpp/relational_funcs/common.hpp
new file mode 100644
index 0000000..a13f7ba
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/common.hpp
@@ -0,0 +1,112 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include <type_traits>
+#include <cmath>
+template<class IN1, class IN2, class IN3, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, const IN3& in3, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
+    OUT1 result;
+    for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+    {
+        result.s[i] = func(in1.s[i], in2.s[i], in3.s[i]);
+    }
+    return result;
+template<class IN1, class IN2, class IN3, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, const IN3& in3, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
+    OUT1 result = func(in1, in2, in3);
+    return result;
+template<class IN1, class IN2, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
+    OUT1 result;
+    for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+    {
+        result.s[i] = func(in1.s[i], in2.s[i]);
+    }
+    return result;
+template<class IN1, class IN2, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
+    OUT1 result = func(in1, in2);
+    return result;
+template<class IN1, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
+    OUT1 result;
+    for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+    {
+        result.s[i] = func(in1.s[i]);
+    }
+    return result;
+template<class IN1, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
+    OUT1 result = func(in1);
+    return result;
+template<class IN1>
+cl_int perform_all_function(const IN1& in1, typename std::enable_if<is_vector_type<IN1>::value>::type* = 0)
+    cl_int result = 1;
+    for(size_t i = 0; i < vector_size<IN1>::value; i++)
+    {
+        result = (in1.s[i] != 0) ? result : cl_int(0);
+    }
+    return result;
+cl_int perform_all_function(const cl_int& in1, typename std::enable_if<!is_vector_type<cl_int>::value>::type* = 0)
+    return (in1 != 0) ? cl_int(1) : cl_int(0);
+template<class IN1>
+cl_int perform_any_function(const IN1& in1, typename std::enable_if<is_vector_type<IN1>::value>::type* = 0)
+    cl_int result = 0;
+    for(size_t i = 0; i < vector_size<IN1>::value; i++)
+    {
+        result = (in1.s[i] != 0) ? cl_int(1) : result;
+    }
+    return result;
+cl_int perform_any_function(const cl_int& in1, typename std::enable_if<!is_vector_type<cl_int>::value>::type* = 0)
+    return (in1 != 0) ? cl_int(1) : cl_int(0);
diff --git a/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp b/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp
new file mode 100644
index 0000000..980d67c
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp
@@ -0,0 +1,150 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "common.hpp"
+// This marco creates a class wrapper for comparision function we want to test.
+template <cl_int N /* Vector size */> \
+struct CLASS_NAME : public binary_func< \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
+                                 > \
+{ \
+    typedef typename make_vector_type<cl_float, N>::type input_type; \
+    typedef typename make_vector_type<cl_int, N>::type result_type; \
+    \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_relational>\n"; \
+    } \
+    \
+    result_type operator()(const input_type& x, const input_type& y) \
+    {    \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return perform_function<input_type, input_type, result_type>( \
+            x, y, \
+            [](const SCALAR& a, const SCALAR& b) \
+            { \
+                if(HOST_FUNC_EXPRESSION) \
+                { \
+                    return cl_int(1); \
+                } \
+                return cl_int(0); \
+            } \
+        ); \
+    } \
+    \
+    bool is_out_bool() \
+    { \
+        return true; \
+    } \
+    \
+    input_type min1() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max1() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    input_type min2() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max2() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    std::vector<input_type> in1_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+    \
+    std::vector<input_type> in2_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+DEF_COMPARISION_FUNC(comparison_func_isequal, isequal, (a == b))
+DEF_COMPARISION_FUNC(comparison_func_isnotequal, isnotequal, !(a == b))
+DEF_COMPARISION_FUNC(comparison_func_isgreater, isgreater, (std::isgreater)(a, b))
+DEF_COMPARISION_FUNC(comparison_func_isgreaterequal, isgreaterequal, ((std::isgreater)(a, b) || a == b))
+DEF_COMPARISION_FUNC(comparison_func_isless, isless, (std::isless)(a, b))
+DEF_COMPARISION_FUNC(comparison_func_islessequal, islessequal, ((std::isless)(a, b) || a == b))
+DEF_COMPARISION_FUNC(comparison_func_islessgreater, islessgreater, ((a < b) || (a > b)))
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+// Helper macro, so we don't have to repreat the same code.  
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isnotequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isgreater)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isgreaterequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isless)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_islessequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_islessgreater)
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/relational_funcs/main.cpp b/test_conformance/clcpp/relational_funcs/main.cpp
new file mode 100644
index 0000000..99b0e5a
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/main.cpp
@@ -0,0 +1,26 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "comparison_funcs.hpp"
+#include "select_funcs.hpp"
+#include "test_funcs.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/relational_funcs/select_funcs.hpp b/test_conformance/clcpp/relational_funcs/select_funcs.hpp
new file mode 100644
index 0000000..2e6f6bd
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/select_funcs.hpp
@@ -0,0 +1,158 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "common.hpp"
+template <class IN1, cl_int N /* Vector size */>
+struct select_func_select : public ternary_func<
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
+                                    typename make_vector_type<IN1, N>::type /* create IN1N type */
+                                 >
+    typedef typename make_vector_type<IN1, N>::type input1_type;
+    typedef typename make_vector_type<IN1, N>::type input2_type;
+    typedef typename make_vector_type<cl_int, N>::type input3_type;
+    typedef typename make_vector_type<IN1, N>::type result_type;
+    std::string str()
+    {
+        return "select";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+    result_type operator()(const input1_type& x, const input2_type& y, const input3_type& z)
+    {   
+        typedef typename scalar_type<input1_type>::type SCALAR1;
+        typedef typename scalar_type<input2_type>::type SCALAR2;
+        typedef typename scalar_type<input3_type>::type SCALAR3;
+        return perform_function<input1_type, input2_type, input3_type, result_type>(
+            x, y, z,
+            [](const SCALAR1& a, const SCALAR2& b, const SCALAR3& c)
+            {
+                    return (c != 0) ? b : a;
+            }
+        );
+    }
+    bool is_in3_bool()
+    {
+        return true;
+    }
+    std::vector<input3_type> in3_special_cases()
+    {
+        return { 
+            detail::make_value<input3_type>(0),
+            detail::make_value<input3_type>(1),
+            detail::make_value<input3_type>(12),
+            detail::make_value<input3_type>(-12)
+        };
+    }
+template <class IN1, cl_int N /* Vector size */>
+struct select_func_bitselect : public ternary_func<
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<IN1, N>::type, /* create cl_intN type */
+                                    typename make_vector_type<IN1, N>::type /* create IN1N type */
+                                 >
+    typedef typename make_vector_type<IN1, N>::type input1_type;
+    typedef typename make_vector_type<IN1, N>::type input2_type;
+    typedef typename make_vector_type<IN1, N>::type input3_type;
+    typedef typename make_vector_type<IN1, N>::type result_type;
+    std::string str()
+    {
+        return "bitselect";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+    result_type operator()(const input1_type& x, const input2_type& y, const input3_type& z)
+    {  
+        static_assert(
+            std::is_integral<IN1>::value,
+            "bitselect test is implemented only for integers."
+        ); 
+        static_assert(
+            std::is_unsigned<IN1>::value,
+            "IN1 type should be unsigned, bitwise operations on signed int may cause problems."
+        );
+        typedef typename scalar_type<input1_type>::type SCALAR1;
+        typedef typename scalar_type<input2_type>::type SCALAR2;
+        typedef typename scalar_type<input3_type>::type SCALAR3;
+        return perform_function<input1_type, input2_type, input3_type, result_type>(
+            x, y, z,
+            [](const SCALAR1& a, const SCALAR2& b, const SCALAR3& c)
+            {
+                return (~c & a) | (c & b);
+            }
+        );
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+// Tests for select(gentype a, gentype b, booln c) are not run in USE_OPENCLC_KERNELS 
+// mode, because this functions in OpenCL C requires different reference functions on host
+// compared to their equivalent in OpenCL C++.
+// (In OpenCL C the result of select(), when gentype is vector type, is based on the most
+// significant bits of c components)
+    // gentype select(gentype a, gentype b, booln c)
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint,  1>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_float, 2>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_short, 4>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint,  8>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint,  16>()))
+    log_info("WARNING:\n\tTests for select(gentype a, gentype b, booln c) are not run in USE_OPENCLC_KERNELS mode\n");
+    // gentype bitselect(gentype a, gentype b, gentype c)
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uint, 1>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_ushort, 2>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uchar, 4>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_ushort, 8>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uint, 16>()))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/relational_funcs/test_funcs.hpp b/test_conformance/clcpp/relational_funcs/test_funcs.hpp
new file mode 100644
index 0000000..77e3d87
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/test_funcs.hpp
@@ -0,0 +1,336 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "common.hpp"
+// This marco creates a class wrapper for unary test function we want to test.
+template <cl_int N /* Vector size */> \
+struct CLASS_NAME : public unary_func< \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
+                                 > \
+{ \
+    typedef typename make_vector_type<cl_float, N>::type input_type; \
+    typedef typename make_vector_type<cl_int, N>::type result_type; \
+    \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_relational>\n"; \
+    } \
+    \
+    result_type operator()(const input_type& x) \
+    {    \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return perform_function<input_type, result_type>( \
+            x, \
+            [](const SCALAR& a) \
+            { \
+                if(HOST_FUNC_EXPRESSION) \
+                { \
+                    return cl_int(1); \
+                } \
+                return cl_int(0); \
+            } \
+        ); \
+    } \
+    \
+    bool is_out_bool() \
+    { \
+        return true; \
+    } \
+    \
+    input_type min1() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max1() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    std::vector<input_type> in1_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+// This marco creates a class wrapper for binary test function we want to test.
+template <cl_int N /* Vector size */> \
+struct CLASS_NAME : public binary_func< \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
+                                 > \
+{ \
+    typedef typename make_vector_type<cl_float, N>::type input_type; \
+    typedef typename make_vector_type<cl_int, N>::type result_type; \
+    \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_relational>\n"; \
+    } \
+    \
+    result_type operator()(const input_type& x, const input_type& y) \
+    {    \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return perform_function<input_type, input_type, result_type>( \
+            x, y, \
+            [](const SCALAR& a, const SCALAR& b) \
+            { \
+                if(HOST_FUNC_EXPRESSION) \
+                { \
+                    return cl_int(1); \
+                } \
+                return cl_int(0); \
+            } \
+        ); \
+    } \
+    \
+    bool is_out_bool() \
+    { \
+        return true; \
+    } \
+    \
+    input_type min1() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max1() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    input_type min2() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max2() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    std::vector<input_type> in1_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+    \
+    std::vector<input_type> in2_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+DEF_UNARY_TEST_FUNC(test_func_isfinite, isfinite, (std::isfinite)(a))
+DEF_UNARY_TEST_FUNC(test_func_isinf, isinf, (std::isinf)(a))
+DEF_UNARY_TEST_FUNC(test_func_isnan, isnan, (std::isnan)(a))
+DEF_UNARY_TEST_FUNC(test_func_isnormal, isnormal, (std::isnormal)(a))
+DEF_UNARY_TEST_FUNC(test_func_signbit, signbit , (std::signbit)(a))
+DEF_BINARY_TEST_FUNC(test_func_isordered, isordered, !(std::isunordered)(a, b))
+DEF_BINARY_TEST_FUNC(test_func_isunordered, isunordered, (std::isunordered)(a, b))
+template <cl_int N /* Vector size */>
+struct test_func_all : public unary_func<
+                                    typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
+                                    cl_int /* create cl_intN type */
+                                 >
+    typedef typename make_vector_type<cl_int, N>::type input_type;
+    typedef cl_int result_type;
+    std::string str()
+    {
+        return "all";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+    result_type operator()(const input_type& x)
+    {
+        return perform_all_function(x);
+    }
+    bool is_out_bool()
+    {
+        return true;
+    }
+    bool is_in1_bool()
+    {
+        return true;
+    }
+    std::vector<input_type> in1_special_cases()
+    {
+        return {
+            detail::make_value<input_type>(0),
+            detail::make_value<input_type>(1),
+            detail::make_value<input_type>(12),
+            detail::make_value<input_type>(-12)
+        };
+    }
+template <cl_int N /* Vector size */>
+struct test_func_any : public unary_func<
+                                    typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
+                                    cl_int /* create cl_intN type */
+                                 >
+    typedef typename make_vector_type<cl_int, N>::type input_type;
+    typedef cl_int result_type;
+    std::string str()
+    {
+        return "any";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+    result_type operator()(const input_type& x)
+    {
+        return perform_any_function(x);
+    }
+    bool is_out_bool()
+    {
+        return true;
+    }
+    bool is_in1_bool()
+    {
+        return true;
+    }
+    std::vector<input_type> in1_special_cases()
+    {
+        return {
+            detail::make_value<input_type>(0),
+            detail::make_value<input_type>(1),
+            detail::make_value<input_type>(12),
+            detail::make_value<input_type>(-12)
+        };
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+// Helper macro, so we don't have to repreat the same code.
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isfinite)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isinf)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isnan)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isnormal)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_signbit)
+// Tests for all(booln x) and any(booln x) are not run in USE_OPENCLC_KERNELS mode,
+// because those functions in OpenCL C require different reference functions on host
+// compared to their equivalents from OpenCL C++.
+// (In OpenCL C those functions returns true/false based on the most significant bits
+// in any/all component/s of x)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_all)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_any)
+    log_info("WARNING:\n\tTests for bool all(booln x) are not run in USE_OPENCLC_KERNELS mode\n");
+    log_info("WARNING:\n\tTests for bool any(booln x) are not run in USE_OPENCLC_KERNELS mode\n");
+    TEST_BINARY_REL_FUNC_MACRO(test_func_isordered)
+    TEST_BINARY_REL_FUNC_MACRO(test_func_isunordered)
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/spec_constants/CMakeLists.txt b/test_conformance/clcpp/spec_constants/CMakeLists.txt
new file mode 100644
index 0000000..3488a5a
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/spec_constants/common.hpp b/test_conformance/clcpp/spec_constants/common.hpp
new file mode 100644
index 0000000..3846fe8
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/common.hpp
@@ -0,0 +1,257 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <algorithm>
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+    last_error = run_spec_constants_test(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+// Base class for all tests of cl::spec_contatnt
+template <class T>
+struct spec_constants_test : public detail::base_func_type<T>
+    // Output buffer type
+    typedef T type;
+    virtual ~spec_constants_test() {};
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    virtual std::string generate_program() = 0;
+    // Return names of test's kernels, in order.
+    // Typical case: one kernel.
+    virtual std::vector<std::string> get_kernel_names()
+    {
+        // Typical case, that is, only one kernel
+        return { this->get_kernel_name() };
+    }
+    // If local size has to be set in clEnqueueNDRangeKernel()
+    // this should return true; otherwise - false;
+    virtual bool set_local_size()
+    {
+        return false;
+    }
+    // Calculates maximal work-group size (one dim)
+    virtual size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
+                                      cl_device_id device,
+                                      size_t work_group_size, // default work-group size
+                                      cl_int& error)
+    {
+        size_t wg_size = work_group_size;
+        for(auto& k : kernels)
+        {
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            wg_size = (std::min)(wg_size, max_wg_size);
+        }
+        return wg_size;
+    }
+    // Sets spec constants
+    // Typical case: no spec constants to set
+    virtual cl_int set_spec_constants(const cl_program& program)
+    {
+        return CL_SUCCESS;
+    }
+    // This covers typical case:
+    // 1. each kernel is executed once,
+    // 2. the only argument in every kernel is output_buffer
+    virtual cl_int execute(const std::vector<cl_kernel>& kernels,
+                           cl_mem& output_buffer,
+                           cl_command_queue& queue,
+                           size_t work_size,
+                           size_t work_group_size)
+    {
+        cl_int err;
+        for(auto& k : kernels)
+        {
+            err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+            err = clEnqueueNDRangeKernel(
+                queue, k, 1,
+                NULL, &work_size, this->set_local_size() ? &work_group_size : NULL,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        }
+        return err;
+    }
+    // This is a function which performs additional queries and checks
+    // if the results are correct. This method is run after checking that
+    // test results (output values) are correct.
+    virtual cl_int check_queries(const std::vector<cl_kernel>& kernels,
+                                 cl_device_id device,
+                                 cl_context context,
+                                 cl_command_queue queue)
+    {
+        (void) kernels;
+        (void) device;
+        (void) context;
+        (void) queue;
+        return CL_SUCCESS;
+    }
+template <class spec_constants_test>
+int run_spec_constants_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, spec_constants_test op)
+    cl_mem buffers[1];
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    size_t wg_size;
+    size_t work_size[1];
+    cl_int err;
+    typedef typename spec_constants_test::type TYPE;
+    // Don't run test for unsupported types
+    if(!(type_supported<TYPE>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    std::string code_str = op.generate_program();
+    std::vector<std::string> kernel_names = op.get_kernel_names();
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+    std::string options = "";
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        options += " -cl-fp16-enable";
+    }
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        options += " -cl-fp64-enable";
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], options);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+    const char * code_c_str = code_str.c_str();
+    err = create_openclcpp_program(context, &program, 1, &(code_c_str), options.c_str());
+    RETURN_ON_ERROR_MSG(err, "Creating OpenCL C++ program failed")
+    // Set spec constants
+    err = op.set_spec_constants(program);
+    RETURN_ON_ERROR_MSG(err, "Setting Spec Constants failed")
+    // Build program and create 1st kernel
+    err = build_program_create_kernel_helper(
+        context, &program, &(kernels[0]), 1, &(code_c_str), kernel_names[0].c_str()
+    );
+    RETURN_ON_ERROR_MSG(err, "Unable to build program or to create kernel")
+    // Create other kernels
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+    // Find the max possible wg size for among all the kernels
+    wg_size = op.get_max_local_size(kernels, device, 1024, err);
+    RETURN_ON_ERROR(err);
+    work_size[0] = count;
+    if(op.set_local_size())
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count) / wg_size)
+        );
+        work_size[0] = wg_number * wg_size;
+    }
+    // host output vector
+    std::vector<TYPE> output = generate_output<TYPE>(work_size[0], 9999);
+    // device output buffer
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    // Execute test
+    err = op.execute(kernels, buffers[0], queue, work_size[0], wg_size);
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    // Check output values
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        TYPE v = op(i, wg_size);
+        if(!(are_equal(v, output[i], detail::make_value<TYPE>(0), op)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<cl_uint>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+    // Check if queries returns correct values
+    err = op.check_queries(kernels, device, context, queue);
+    RETURN_ON_ERROR(err);
+    log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<TYPE>().c_str());
+    clReleaseMemObject(buffers[0]);
+    for(auto& k : kernels)
+        clReleaseKernel(k);
+    clReleaseProgram(program);
+    return err;
diff --git a/test_conformance/clcpp/spec_constants/main.cpp b/test_conformance/clcpp/spec_constants/main.cpp
new file mode 100644
index 0000000..0582ed5
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/main.cpp
@@ -0,0 +1,26 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_spec_consts_attributes.hpp"
+#include "test_spec_consts_if.hpp"
+#include "test_spec_consts_init_vars.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp
new file mode 100644
index 0000000..539167f
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp
@@ -0,0 +1,281 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include "common.hpp"
+// In this test we check if specialization constant can be successfully used
+// in kernel attribute cl::required_work_group_size(X, Y, Z).
+struct spec_const_required_work_group_size_test : public spec_constants_test<cl_uint>
+    // See generate_program() to know what set_spec_constant is for.
+    spec_const_required_work_group_size_test(const bool set_spec_constant,
+                                             const cl_uint work_group_size_0)
+        : m_set_spec_constant(set_spec_constant),
+          m_work_group_size_0(work_group_size_0)
+    {
+    }
+    std::string str()
+    {
+        if(m_set_spec_constant)
+            return "spec_const_in_required_work_group_size_" + std::to_string(m_work_group_size_0);
+        else
+            return "spec_const_in_required_work_group_size_not_set";
+    }
+    bool set_local_size()
+    {
+        return true;
+    }
+    size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
+                              cl_device_id device,
+                              size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        if(m_set_spec_constant)
+        {
+            return m_work_group_size_0;
+        }
+        return size_t(1);
+    }
+    cl_uint operator()(size_t i, size_t work_group_size)
+    {
+        (void) work_group_size;
+        if(m_set_spec_constant)
+        {
+            return m_work_group_size_0;
+        }
+        return cl_uint(1);
+    }
+    // Check if query for CL_KERNEL_COMPILE_WORK_GROUP_SIZE using clGetKernelWorkGroupInfo
+    // return correct values. It should return the work-group size specified by the
+    // cl::required_work_group_size(X, Y, Z) qualifier.
+    cl_int check_queries(const std::vector<cl_kernel>& kernels,
+                         cl_device_id device,
+                         cl_context context,
+                         cl_command_queue queue)
+    {
+        (void) device;
+        (void) context;
+        size_t compile_wg_size[] = { 1, 1, 1 };
+        cl_int error = clGetKernelWorkGroupInfo(
+            kernels[0], device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
+            sizeof(compile_wg_size), compile_wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        if(m_set_spec_constant)
+        {
+            if(compile_wg_size[0] != m_work_group_size_0
+               || compile_wg_size[1] != 1
+               || compile_wg_size[2] != 1)
+            {
+                error = -1;
+            }
+        }
+        else
+        {
+            if(compile_wg_size[0] != 1
+               || compile_wg_size[1] != 1
+               || compile_wg_size[2] != 1)
+            {
+                error = -1;
+            }
+        }
+        return error;
+    }
+    // Sets spec constant
+    cl_int set_spec_constants(const cl_program& program)
+    {
+        cl_int error = CL_SUCCESS;
+        if(m_set_spec_constant)
+        {
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(1), sizeof(cl_uint), static_cast<void*>(&m_work_group_size_0)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+        }
+        return error;
+    }
+    // Each work-item writes get_local_size(0) to output[work-item-global-id]
+    std::string generate_program(bool with_attribute)
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            std::string att = " ";
+            if(with_attribute)
+            {
+                std::string work_group_size_0 = "1";
+                if(m_set_spec_constant)
+                {
+                    work_group_size_0 = std::to_string(m_work_group_size_0);
+                }
+                att = "\n__attribute__((reqd_work_group_size(" + work_group_size_0 + ",1,1)))\n";
+            }
+            return
+                "__kernel" + att + "void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = get_local_size(0);\n"
+                "}\n";
+        #else
+            std::string att = "";
+            if(with_attribute)
+            {
+                att = "[[cl::required_work_group_size(spec1, 1, 1)]]\n";
+            }
+            return
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_spec_constant>\n"
+                "using namespace cl;\n"
+                "spec_constant<uint, 1> spec1{1};\n"
+                + att +
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = get_local_size(0);\n"
+                "}\n";
+        #endif
+    }
+    // Each work-item writes get_local_size(0) to output[work-item-global-id]
+    std::string generate_program()
+    {
+        return generate_program(true);
+    }
+    bool m_set_spec_constant;
+    cl_uint m_work_group_size_0;
+// This function return max work-group size that can be used
+// for kernels defined in source
+size_t get_max_wg_size(const std::string& source,
+                       const std::vector<std::string>& kernel_names,
+                       size_t work_group_size, // max wg size we want to have
+                       cl_device_id device,
+                       cl_context context,
+                       cl_command_queue queue,
+                       cl_int& err)
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), source, kernel_names[0], "-cl-std=CL2.0", false);
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+    err = create_opencl_kernel(context, &program, &(kernels[0]), source, kernel_names[0]);
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+    size_t wg_size = work_group_size;
+    for(auto& k : kernels)
+    {
+        size_t max_wg_size;
+        err = clGetKernelWorkGroupInfo(
+            k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+        wg_size = (std::min)(wg_size, max_wg_size);
+    }
+    return wg_size;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// If ONLY_SPIRV_COMPILATION is defined we can't check the max work-group size for the
+// kernel because OpenCL kernel object is never created in that mode.
+    const size_t max_wg_size = 16;
+    // Get max work-group size that can be used in [[cl::required_work_group_size(X, 1, 1)]]
+    // We do this by building kernel without this attribute and checking what is the max
+    // work-group size we can use with it.
+    auto test = spec_const_required_work_group_size_test(true, 1);
+    const size_t max_wg_size = get_max_wg_size(
+        test.generate_program(false), test.get_kernel_names(),
+        1024, // max wg size we want to test
+        device, context, queue,
+        error
+    );
+    RETURN_ON_ERROR_MSG(error, "Can't get max work-group size");
+    // Run tests when specialization constant spec1 is set (kernel
+    // attribute is [[cl::required_work_group_size(spec1, 1, 1)]]).
+    for(size_t i = 1; i <= max_wg_size; i *=2)
+    {
+            spec_const_required_work_group_size_test(
+                true, i
+            )
+        );
+    }
+    // This test does not set spec constant
+        spec_const_required_work_group_size_test(
+            false, 9999999 // This value is incorrect, but won't be set and kernel
+                           // attribute should be [[cl::required_work_group_size(1, 1, 1)]]
+        )
+    );
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp
new file mode 100644
index 0000000..1c7cec2
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp
@@ -0,0 +1,161 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include "common.hpp"
+// This class tests using specialization constant in if statement
+template <class T /* spec constant type*/>
+struct spec_const_in_if_test : public spec_constants_test<cl_uint>
+    // See generate_program() to know what set_spec_constant is for.
+    spec_const_in_if_test(const bool set_spec_constant)
+        : m_set_spec_constant(set_spec_constant)
+    {
+        static_assert(
+            is_vector_type<T>::value == false,
+            "Specialization constant can be only scalar int or float type"
+        );
+        switch (sizeof(T))
+        {
+            case 1:
+                m_test_value = T(127);
+                break;
+            case 2:
+                m_test_value = T(0xdeadU);
+                break;
+            // 4 and 8
+            default:
+                m_test_value = T(0xdeaddeadU);
+                break;
+        }
+    }
+    std::string str()
+    {
+        return "spec_const_in_if_" + type_name<T>();
+    }
+    cl_uint operator()(size_t i, size_t work_group_size)
+    {
+        (void) work_group_size;
+        if(m_set_spec_constant)
+        {
+            return m_test_value;
+        }
+        return static_cast<cl_uint>(i);
+    }
+    // Sets spec constant
+    cl_int set_spec_constants(const cl_program& program)
+    {
+        cl_int error = CL_SUCCESS;
+        if(m_set_spec_constant)
+        {
+            T spec1 = static_cast<T>(m_test_value);
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(1), sizeof(T), static_cast<void*>(&spec1)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+        }
+        return error;
+    }
+    // IF set_spec_constant == true:
+    // each work-item writes T(m_test_value) to output[work-item-global-id]
+    // Otherwise:
+    // each work-item writes T(get_global_id(0)) to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            std::string result = "gid";
+            if(m_set_spec_constant)
+                result = std::to_string(m_test_value);
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = " + result + ";\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_spec_constant>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "spec_constant<TYPE,  1> spec1{TYPE(0)};\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    if(get(spec1) == TYPE(" + std::to_string(m_test_value) +"))\n"
+                "    {\n"
+                "        output[gid] = " + std::to_string(m_test_value) +";\n"
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "        output[gid] = gid;\n"
+                "    }\n"
+                "}\n";        
+        #endif
+    }
+    bool m_set_spec_constant;
+    cl_uint m_test_value;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    const std::vector<bool> set_spec_const_options { true, false };
+    for(auto option : set_spec_const_options)
+    {        
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_char>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_uchar>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_int>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_uint>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_long>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_ulong>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_float>(option));
+        if(is_extension_available(device, "cl_khr_fp16"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_half>(option));
+        }
+        if(is_extension_available(device, "cl_khr_fp64"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_double>(option));
+        }
+    }
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp
new file mode 100644
index 0000000..20bbff0
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp
@@ -0,0 +1,174 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include "common.hpp"
+// This class tests initializing variables with a specialization constant value.
+template <class T /* spec constant type*/>
+struct spec_const_init_var : public spec_constants_test<cl_uint>
+    // See generate_program() to know what set_spec_constant is for.
+    spec_const_init_var(const bool set_spec_constant)
+        : m_set_spec_constant(set_spec_constant)
+    {
+        static_assert(
+            is_vector_type<T>::value == false,
+            "Specialization constant can be only scalar int or float type"
+        );
+        switch (sizeof(T))
+        {
+            case 1:
+                m_test_value = T(127);
+                break;
+            case 2:
+                m_test_value = T(0xdeadU);
+                break;
+            // 4 and 8
+            default:
+                m_test_value = T(0xdeaddeadU);
+                break;
+        }
+    }
+    std::string str()
+    {
+        return "spec_const_init_var_" + type_name<T>();
+    }
+    cl_uint operator()(size_t i, size_t work_group_size)
+    {
+        (void) work_group_size;
+        if(m_set_spec_constant)
+        {
+            return m_test_value;
+        }
+        return static_cast<cl_uint>(i);
+    }
+    // Sets spec constant
+    cl_int set_spec_constants(const cl_program& program)
+    {
+        cl_int error = CL_SUCCESS;
+        if(m_set_spec_constant)
+        {
+            T spec = static_cast<T>(m_test_value);
+            // spec1
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(1), sizeof(T), static_cast<void*>(&spec)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+            // spec2
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(2), sizeof(T), static_cast<void*>(&spec)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+        }
+        return error;
+    }
+    // IF set_spec_constant == true:
+    // each work-item writes T(m_test_value) to output[work-item-global-id]
+    // Otherwise:
+    // each work-item writes T(get_global_id(0)) to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            std::string result = "gid";
+            if(m_set_spec_constant)
+                result = std::to_string(m_test_value);
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = " + result + ";\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_spec_constant>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "spec_constant<TYPE,  1> spec1{TYPE(0)};\n"
+                "spec_constant<TYPE,  2> spec2{TYPE(0)};\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    TYPE var1(spec1.get());\n"
+                "    TYPE var2(spec2);\n"
+                "    TYPE var3; var3 = spec2;\n"
+                "    if((var1 == TYPE(" + std::to_string(m_test_value) +")) "
+                       "&& (var2 == TYPE(" + std::to_string(m_test_value) +"))\n"
+                       "&& (var3 == TYPE(" + std::to_string(m_test_value) +")))\n"
+                "    {\n"
+                "        output[gid] = " + std::to_string(m_test_value) +";\n"
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "        output[gid] = gid;\n"
+                "    }\n"
+                "}\n";        
+        #endif
+    }
+    bool m_set_spec_constant;
+    cl_uint m_test_value;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    const std::vector<bool> set_spec_const_options { true, false };
+    for(auto option : set_spec_const_options)
+    {        
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_char>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_uchar>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_int>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_uint>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_long>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_ulong>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_float>(option));
+        if(is_extension_available(device, "cl_khr_fp16"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_half>(option));
+        }
+        if(is_extension_available(device, "cl_khr_fp64"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_double>(option));
+        }
+    }
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
diff --git a/test_conformance/clcpp/spirv10_2016.04.27.7z b/test_conformance/clcpp/spirv10_2016.04.27.7z
new file mode 100644
index 0000000..306be24
--- /dev/null
+++ b/test_conformance/clcpp/spirv10_2016.04.27.7z
@@ -0,0 +1,3 @@
+oid sha256:fe4f34d616ed7ef70e870c22078f60655f68b0c5191c8d8b9d045dd0e7390bc2
+size 5529152
diff --git a/test_conformance/clcpp/subgroups/CMakeLists.txt b/test_conformance/clcpp/subgroups/CMakeLists.txt
new file mode 100644
index 0000000..c8307d2
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/subgroups/common.hpp b/test_conformance/clcpp/subgroups/common.hpp
new file mode 100644
index 0000000..2b05a3c
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/common.hpp
@@ -0,0 +1,97 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <string>
+#include <vector>
+#include <limits>
+enum class work_group_op : int {
+    add, min, max
+std::string to_string(work_group_op op)
+    switch (op)
+    {
+        case work_group_op::add:
+            return "add";
+        case work_group_op::min:
+            return "min";
+        case work_group_op::max:
+            return "max";
+        default:
+            break;
+    }
+    return "";
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_input(size_t count, size_t wg_size)
+    std::vector<CL_INT_TYPE> input(count, CL_INT_TYPE(1));
+    switch (op)
+    {
+        case work_group_op::add:
+            return input;
+        case work_group_op::min:
+            {
+                size_t j = wg_size;
+                for(size_t i = 0; i < count; i++)
+                {
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j--;
+                    if(j == 0)
+                    {
+                        j = wg_size;
+                    }
+                }
+            }
+            break;
+        case work_group_op::max:
+            {
+                size_t j = 0;
+                for(size_t i = 0; i < count; i++)
+                {
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j++;
+                    if(j == wg_size)
+                    {
+                        j = 0;
+                    }
+                }
+            }
+    }
+    return input;
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_output(size_t count, size_t wg_size)
+    switch (op)
+    {
+        case work_group_op::add:
+            return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+        case work_group_op::min:
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::max)());
+        case work_group_op::max:
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::min)());
+    }
+    return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
diff --git a/test_conformance/clcpp/subgroups/main.cpp b/test_conformance/clcpp/subgroups/main.cpp
new file mode 100644
index 0000000..c026228
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/main.cpp
@@ -0,0 +1,29 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_sg_all.hpp"
+#include "test_sg_any.hpp"
+#include "test_sg_broadcast.hpp"
+#include "test_sg_reduce.hpp"
+#include "test_sg_scan_inclusive.hpp"
+#include "test_sg_scan_exclusive.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/subgroups/test_sg_all.hpp b/test_conformance/clcpp/subgroups/test_sg_all.hpp
new file mode 100644
index 0000000..01d6638
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_all.hpp
@@ -0,0 +1,219 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+std::string generate_sg_all_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_all(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = sub_group_all(input[tid] < input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+int verify_sg_all(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < count; i += wg_size)
+    {
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
+        {
+            // sub-group all
+            bool all = true;
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if(!(in[i+j+k] < in[i+j+k+1]))
+                {
+                    all = false;
+                    break;
+                }
+            }
+            // Convert bool to uint
+            cl_uint all_uint = all ? 1 : 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (all_uint != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_all %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(all_uint),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+std::vector<cl_uint> generate_input_sg_all(size_t count, size_t wg_size)
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of work-groups (input[tid] < input[tid+1]) will
+        // generate false, it means that for sub_group_all(input[tid] < input[tid+1])
+        // should return false for all sub-groups in that work-groups
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+std::vector<cl_uint> generate_output_sg_all(size_t count, size_t wg_size)
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+int sub_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+    std::string code_str = generate_sg_all_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all");
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<cl_uint> input = generate_input_sg_all(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_sg_all(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (verify_sg_all(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_all failed");
+    }
+    log_info("sub_group_all passed\n");
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int err = CL_SUCCESS;
+    err = sub_group_all(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    return err;
diff --git a/test_conformance/clcpp/subgroups/test_sg_any.hpp b/test_conformance/clcpp/subgroups/test_sg_any.hpp
new file mode 100644
index 0000000..769bef0
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_any.hpp
@@ -0,0 +1,219 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+std::string generate_sg_any_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_any(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = sub_group_any(input[tid] == input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+int verify_sg_any(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < count; i += wg_size)
+    {
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
+        {
+            // sub-group any
+            bool any = false;
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if(in[i+j+k] == in[i+j+k+1])
+                {
+                    any = true;
+                    break;
+                }
+            }
+            // Convert bool to uint
+            cl_uint any_uint = any ? 1 : 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (any_uint != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(any_uint),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+std::vector<cl_uint> generate_input_sg_any(size_t count, size_t wg_size)
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of work-groups (input[tid] == input[tid+1]) will
+        // generate true, it means that for sub_group_all(input[tid] == input[tid+1])
+        // should return false for one sub-group in that work-groups
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+std::vector<cl_uint> generate_output_sg_any(size_t count, size_t wg_size)
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+int sub_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+    std::string code_str = generate_sg_any_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any");
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<cl_uint> input = generate_input_sg_any(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_sg_any(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (verify_sg_any(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_any failed");
+    }
+    log_info("sub_group_any passed\n");
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int err = CL_SUCCESS;
+    err = sub_group_any(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    return err;
diff --git a/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp b/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp
new file mode 100644
index 0000000..39e420a
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp
@@ -0,0 +1,204 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+std::string generate_sg_broadcast_kernel_code()
+    return
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "#include <opencl_work_group>\n"
+        "using namespace cl;\n"
+        "__kernel void test_sg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "    uint result = sub_group_broadcast(input[tid], 0);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+verify_sg_broadcast(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < count; i += wg_size)
+    {
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
+        {
+            // sub-group broadcast
+            cl_uint broadcast_result = in[i+j];
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (broadcast_result != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(broadcast_result),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+std::vector<cl_uint> generate_input_sg_broadcast(size_t count, size_t wg_size)
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(j);
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+std::vector<cl_uint> generate_output_sg_broadcast(size_t count, size_t wg_size)
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+int sub_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[] = { 1 };
+    int err;
+    // Get kernel source code
+    std::string code_str = generate_sg_broadcast_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
+    // Get max flat workgroup size
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+    // Calculate global work size
+    size_t flat_work_size = count;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<cl_uint> input = generate_input_sg_broadcast(flat_work_size, wg_size);
+    std::vector<cl_uint> output = generate_output_sg_broadcast(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL,&err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    int result = verify_sg_broadcast( input, output, work_size[0], wg_size, sg_max_size);
+    RETURN_ON_ERROR_MSG(result, "sub_group_broadcast failed")
+    log_info("sub_group_broadcast passed\n");
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int err = CL_SUCCESS;
+    err = sub_group_broadcast(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    return err;
diff --git a/test_conformance/clcpp/subgroups/test_sg_reduce.hpp b/test_conformance/clcpp/subgroups/test_sg_reduce.hpp
new file mode 100644
index 0000000..6b20d50
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_reduce.hpp
@@ -0,0 +1,345 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_sg_reduce_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_reduce(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                        "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = sub_group_reduce<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+template <class CL_INT_TYPE>
+int verify_sg_reduce_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE sum = 0;
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                sum += in[i + j + k];
+            }
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (sum != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(sum),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE>
+int verify_sg_reduce_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
+            }
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (min != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(min),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE>
+int verify_sg_reduce_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
+            }
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (max != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(max),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE, work_group_op op>
+int verify_sg_reduce(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_sg_reduce_add(in, out, wg_size, sg_size);
+        case work_group_op::min:
+            return verify_sg_reduce_min(in, out, wg_size, sg_size);
+        case work_group_op::max:
+            return verify_sg_reduce_max(in, out, wg_size, sg_size);
+    }
+    return -1;
+template <class CL_INT_TYPE, work_group_op op>
+int sub_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+    std::string code_str = generate_sg_reduce_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce");
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (verify_sg_reduce<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_reduce_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("sub_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = sub_group_reduce<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_reduce<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_reduce<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_reduce<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = sub_group_reduce<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_reduce<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_reduce<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_reduce<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = sub_group_reduce<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_reduce<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_reduce<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_reduce<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
diff --git a/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp b/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp
new file mode 100644
index 0000000..7bb2b18
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp
@@ -0,0 +1,325 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_sg_scan_exclusive_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_scan_exclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_exclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+template <class CL_INT_TYPE>
+int verify_sg_scan_exclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE sum = 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (sum != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(sum),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+                sum += in[i + j + k];
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE>
+int verify_sg_scan_exclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (min != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(min),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+                min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE>
+int verify_sg_scan_exclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (max != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(max),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+                max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE, work_group_op op>
+int verify_sg_scan_exclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_sg_scan_exclusive_add(in, out, wg_size, sg_size);
+        case work_group_op::min:
+            return verify_sg_scan_exclusive_min(in, out, wg_size, sg_size);
+        case work_group_op::max:
+            return verify_sg_scan_exclusive_max(in, out, wg_size, sg_size);
+    }
+    return -1;
+template <class CL_INT_TYPE, work_group_op op>
+int sub_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+    std::string code_str = generate_sg_scan_exclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive");
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (verify_sg_scan_exclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("sub_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = sub_group_scan_exclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_exclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_exclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = sub_group_scan_exclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    local_error = sub_group_scan_exclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    local_error = sub_group_scan_exclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = sub_group_scan_exclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_exclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_exclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
diff --git a/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp b/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp
new file mode 100644
index 0000000..803daa0
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp
@@ -0,0 +1,332 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_sg_scan_inclusive_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+template <class CL_INT_TYPE>
+int verify_sg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE sum = 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                sum += in[i + j + k];
+                if (sum != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(sum),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE>
+int verify_sg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
+                if (min != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(min),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE>
+int verify_sg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
+                if (max != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(max),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE, work_group_op op>
+int verify_sg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_sg_scan_inclusive_add(in, out, wg_size, sg_size);
+        case work_group_op::min:
+            return verify_sg_scan_inclusive_min(in, out, wg_size, sg_size);
+        case work_group_op::max:
+            return verify_sg_scan_inclusive_max(in, out, wg_size, sg_size);
+    }
+    return -1;
+template <class CL_INT_TYPE, work_group_op op>
+int sub_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+    std::string code_str = generate_sg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (verify_sg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("sub_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = sub_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = sub_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = sub_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
diff --git a/test_conformance/clcpp/synchronization/CMakeLists.txt b/test_conformance/clcpp/synchronization/CMakeLists.txt
new file mode 100644
index 0000000..70d3637
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/synchronization/main.cpp b/test_conformance/clcpp/synchronization/main.cpp
new file mode 100644
index 0000000..b337238
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/main.cpp
@@ -0,0 +1,27 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_work_group_barrier.hpp"
+#include "test_sub_group_barrier.hpp"
+#include "named_barrier/test_spec_example.hpp"
+#include "named_barrier/test_named_barrier.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/synchronization/named_barrier/common.hpp b/test_conformance/clcpp/synchronization/named_barrier/common.hpp
new file mode 100644
index 0000000..da34dce
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/named_barrier/common.hpp
@@ -0,0 +1,171 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+// Common for all OpenCL C++ tests
+#include "../../common.hpp"
+#include "../../funcs_test_utils.hpp"
+    last_error = run_work_group_named_barrier_barrier_test(  \
+        device, context, queue, num_elements, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+namespace named_barrier {
+struct work_group_named_barrier_test_base : public detail::base_func_type<cl_uint>
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    // It's assumed that this program has only one kernel.
+    virtual std::string generate_program() = 0;
+    // Return value that is expected to be in output_buffer[i]
+    virtual cl_uint operator()(size_t i, size_t work_group_size, size_t mas_sub_group_size) = 0;
+    // Kernel execution
+    // This covers typical case: kernel is executed once, kernel
+    // has only one argument which is output buffer
+    virtual cl_int execute(const cl_kernel kernel,
+                           const cl_mem output_buffer,
+                           const cl_command_queue& queue,
+                           const size_t work_size,
+                           const size_t work_group_size)
+    {
+        cl_int err;
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+        return err;
+    }
+    // Calculates maximal work-group size (one dim)
+    virtual size_t get_max_local_size(const cl_kernel kernel,
+                                      const cl_device_id device,
+                                      const size_t work_group_size, // default work-group size
+                                      cl_int& error)
+    {
+        size_t max_wg_size;
+        error = clGetKernelWorkGroupInfo(
+            kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+        );
+        RETURN_ON_ERROR(error)
+        return (std::min)(work_group_size, max_wg_size);
+    }
+    // if work-groups should be uniform
+    virtual bool enforce_uniform()
+    {
+        return false;
+    }
+template <class work_group_named_barrier_test>
+int run_work_group_named_barrier_barrier_test(cl_device_id device, cl_context context, cl_command_queue queue,
+                                              size_t count, work_group_named_barrier_test test)
+    cl_mem buffers[1];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_group_size;
+    size_t work_size[1];
+    cl_int err;
+    std::string code_str = test.generate_program();
+    std::string kernel_name = test.get_kernel_name();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    // Find the max possible wg size for among all the kernels
+    work_group_size = test.get_max_local_size(kernel, device, 256, err);
+    RETURN_ON_ERROR(err);
+    if(work_group_size == 0)
+    {
+        log_info("SKIPPED: Can't produce local size with enough sub-groups. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+    work_size[0] = count;
+    // uniform work-group
+    if(test.enforce_uniform())
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(work_size[0]) / work_group_size)
+        );
+        work_size[0] = wg_number * work_group_size;
+    }
+    // host output vector
+    std::vector<cl_uint> output = generate_output<cl_uint>(work_size[0], 9999);
+    // device output buffer
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    // Execute test kernels
+    err = test.execute(kernel, buffers[0], queue, work_size[0], work_group_size);
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer")
+    // Check output values
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        cl_uint v = test(i, work_group_size, i);
+        if(!(are_equal(v, output[i], ::detail::make_value<cl_uint>(0), test)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", test.str().c_str(), type_name<cl_uint>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+    log_info("test_%s(%s) passed\n", test.str().c_str(), type_name<cl_uint>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+} // namespace named_barrier
diff --git a/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp b/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp
new file mode 100644
index 0000000..a4f9a04
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp
@@ -0,0 +1,490 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "common.hpp"
+namespace named_barrier {
+struct local_fence_named_barrier_test : public work_group_named_barrier_test_base
+    std::string str()
+    {
+        return "local_fence";
+    }
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
+    {
+        return static_cast<cl_uint>(i);
+    }
+    // At the end every work-item writes its global id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                "__kernel void " + this->get_kernel_name() + "(global uint *output, "
+                                                              "local uint * lmem)\n"
+                "{\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  output[gid] = gid;\n"
+                "}\n";
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "  local<work_group_named_barrier> a(1);\n"
+                "  local<work_group_named_barrier> b(2);\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  size_t lid = get_local_id(0);\n"
+                "  size_t value;\n"
+                "  if(get_num_sub_groups() == 1)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    a.wait(mem_fence::local);\n"
+                "    value = lmem[lid];" // lmem[lid] shoule be equal to gid
+                "  }\n"
+                "  else if(get_num_sub_groups() == 2)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    b.wait(mem_fence::local);\n"
+                "    value = lmem[lid];" // lmem[lid] shoule be equal to gid
+                "  }\n"
+                "  else if(get_num_sub_groups() > 2)\n"
+                "  {\n"
+                "    if(get_sub_group_id() < 2)\n"
+                "    {\n"
+                "      const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
+                       // local and global id of some work-item outside of work-item subgroup,
+                       // but within subgroups 0 and 1.
+                "      size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
+                "      size_t other_gid = (gid - lid) + other_lid;\n"
+                "      lmem[other_lid] = other_gid;\n"
+                "      b.wait(mem_fence::local);\n" // subgroup 0 and 1 are sync (local)
+                "      value = lmem[lid];" // lmem[lid] shoule be equal to gid
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "      value = gid;\n"
+                "    }\n"
+                "  }\n"
+                "  output[gid] = value;\n"
+                "}\n";
+        #endif
+    }
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        size_t wg_size;
+        for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
+        {
+            error = clSetKernelArg(kernel, 1, wg_size * sizeof(cl_uint), NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            if(max_wg_size >= wg_size) break;
+        }
+        return wg_size;
+    }
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, work_group_size * sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+        err = clFinish(queue);
+        return err;
+    }
+struct global_fence_named_barrier_test : public work_group_named_barrier_test_base
+    std::string str()
+    {
+        return "global_fence";
+    }
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
+    {
+        return static_cast<cl_uint>(i % work_group_size);
+    }
+    // At the end every work-item writes its local id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                "__kernel void " + this->get_kernel_name() + "(global uint * output, "
+                                                              "global uint * temp)\n"
+                "{\n"
+                "size_t gid = get_global_id(0);\n"
+                "output[gid] = get_local_id(0);\n"
+                "}\n";
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "global_ptr<uint[]> temp)\n"
+                "{\n\n"
+                "  local<work_group_named_barrier> a(1);\n"
+                "  local<work_group_named_barrier> b(2);\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  size_t lid = get_local_id(0);\n"
+                "  size_t value;\n"
+                "  if(get_num_sub_groups() == 1)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    temp[other_gid] = other_lid + 1;\n"
+                "    a.wait(mem_fence::global);\n"
+                "    size_t other_lid_same_subgroup = (lid + 2) % get_sub_group_size();\n"
+                "    size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
+                "    temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
+                "    a.wait(mem_fence::global, memory_scope_sub_group);\n"
+                "    value = temp[gid];" // temp[gid] shoule be equal to lid
+                "  }\n"
+                "  else if(get_num_sub_groups() == 2)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    temp[other_gid] = other_lid + 1;\n"
+                "    b.wait(mem_fence::global);\n" // both subgroups wait, both are sync
+                "    size_t other_lid_same_subgroup = "
+                       "((lid + 1) % get_sub_group_size()) + (get_sub_group_id() * get_sub_group_size());\n"
+                "    size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
+                "    temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
+                "    b.wait(mem_fence::global, memory_scope_sub_group);\n"  // both subgroups wait, sync only within subgroup
+                "    value = temp[gid];" // temp[gid] shoule be equal to lid
+                "  }\n"
+                "  else if(get_num_sub_groups() > 2)\n"
+                "  {\n"
+                "    if(get_sub_group_id() < 2)\n"
+                "    {\n"
+                "      const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
+                       // local and global id of some work-item outside of work-item subgroup,
+                       // but within subgroups 0 and 1.
+                "      size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
+                "      size_t other_gid = (gid - lid) + other_lid;\n"
+                "      temp[other_gid] = other_lid + 1;\n"
+                "      b.wait(mem_fence::global);\n" // both subgroups wait, both are sync
+                       // local and global id of some other work-item within work-item subgroup
+                "      size_t other_lid_same_subgroup = "
+                         "((lid + 1) % get_sub_group_size()) + (get_sub_group_id() * get_sub_group_size());\n"
+                "      size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
+                "      temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
+                "      b.wait(mem_fence::global, memory_scope_sub_group);\n" // both subgroups wait, sync only within subgroup
+                "      value = temp[gid];" // temp[gid] shoule be equal to lid
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "      value = lid;\n"
+                "    }\n"
+                "  }\n"
+                "  output[gid] = value;\n"
+                "}\n";
+        #endif
+    }
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        size_t max_wg_size;
+        error = clGetKernelWorkGroupInfo(
+            kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        return (std::min)(max_wg_size, work_group_size);
+    }
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+        // create temp buffer
+        auto temp_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * work_size, NULL, &err);
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+        err = clFinish(queue);
+        err |= clReleaseMemObject(temp_buffer);
+        return err;
+    }
+struct global_local_fence_named_barrier_test : public work_group_named_barrier_test_base
+    std::string str()
+    {
+        return "global_local_fence";
+    }
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
+    {
+        return static_cast<cl_uint>(i % work_group_size);
+    }
+    // At the end every work-item writes its local id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                "__kernel void " + this->get_kernel_name() + "(global uint * output, "
+                                                              "global uint * temp,"
+                                                              "local uint * lmem)\n"
+                "{\n"
+                "size_t gid = get_global_id(0);\n"
+                "output[gid] = get_local_id(0);\n"
+                "}\n";
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "global_ptr<uint[]> temp,"
+                                                              "local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "  local<work_group_named_barrier> a(1);\n"
+                "  local<work_group_named_barrier> b(2);\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  size_t lid = get_local_id(0);\n"
+                "  size_t value = 0;\n"
+                "  if(get_num_sub_groups() == 1)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    temp[other_gid] = other_lid;\n"
+                "    a.wait(mem_fence::local | mem_fence::global);\n"
+                "    if(lmem[lid] == gid) value = temp[gid];\n"
+                "  }\n"
+                "  else if(get_num_sub_groups() == 2)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    temp[other_gid] = other_lid;\n"
+                "    b.wait(mem_fence::local | mem_fence::global);\n"
+                "    if(lmem[lid] == gid) value = temp[gid];\n"
+                "  }\n"
+                "  else if(get_num_sub_groups() > 2)\n"
+                "  {\n"
+                "    if(get_sub_group_id() < 2)\n"
+                "    {\n"
+                "      const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
+                       // local and global id of some work-item outside of work-item subgroup,
+                       // but within subgroups 0 and 1.
+                "      size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
+                "      size_t other_gid = (gid - lid) + other_lid;\n"
+                "      lmem[other_lid] = other_gid;\n"
+                "      temp[other_gid] = other_lid;\n"
+                "      b.wait(mem_fence::local | mem_fence::global);\n"
+                "      if(lmem[lid] == gid) value = temp[gid];\n"
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "      value = lid;\n"
+                "    }\n"
+                "  }\n"
+                "  output[gid] = value;\n"
+                "}\n";
+        #endif
+    }
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        size_t wg_size;
+        for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
+        {
+            error = clSetKernelArg(kernel, 2, wg_size * sizeof(cl_uint), NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            if(max_wg_size >= wg_size) break;
+        }
+        return wg_size;
+    }
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+        // create temp buffer
+        auto temp_buffer = clCreateBuffer(
+            context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+            sizeof(cl_uint) * work_size, NULL, &err
+        );
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
+        err |= clSetKernelArg(kernel, 2, work_group_size * sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+        err = clFinish(queue);
+        err |= clReleaseMemObject(temp_buffer);
+        return err;
+    }
+// ------------------------------------------------------------------------------
+// -------------------------- RUN TESTS -----------------------------------------
+// ------------------------------------------------------------------------------
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+#if !(defined(DEVELOPMENT) && (defined(USE_OPENCLC_KERNELS) || defined(ONLY_SPIRV_COMPILATION)))
+    if(!is_extension_available(device, "cl_khr_subgroup_named_barrier"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroup_named_barrier` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+    // An implementation shall support at least 8 named barriers per work-group. The exact
+    // maximum number can be queried using clGetDeviceInfo with CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR
+    // from the OpenCL 2.2 Extension Specification.
+    cl_uint named_barrier_count;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, sizeof(cl_uint), &named_barrier_count, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    if(named_barrier_count < 8)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Maximum number of named barriers must be at least 8.");
+    }
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(local_fence_named_barrier_test())
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(global_fence_named_barrier_test())
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(global_local_fence_named_barrier_test())
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+} // namespace
diff --git a/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp b/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp
new file mode 100644
index 0000000..c80ab71
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp
@@ -0,0 +1,323 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "common.hpp"
+namespace named_barrier {
+// ------------------------------------------------------------------------------
+// ----------------------- SPECIFICATION EXAMPLE TEST----------------------------
+// ------------------------------------------------------------------------------
+// This test is based on the example in OpenCL C++ 1.0 specification (OpenCL C++
+// Standard Library > Synchronization Functions > Named barriers > wait).
+struct spec_example_work_group_named_barrier_test : public work_group_named_barrier_test_base
+    std::string str()
+    {
+        return "spec_example";
+    }
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t mas_sub_group_size)
+    {
+        return static_cast<cl_uint>(i);
+    }
+    // At the end every work-item writes its global id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                // In OpenCL C kernel we imitate subgroups by partitioning work-group (based on
+                // local ids of work-items), work_group_named_barrier.wait(..) calls are replaced
+                // with work_group_barriers.
+                "__kernel void " + this->get_kernel_name() + "(global uint *output, "
+                                                              "global uint * temp, "
+                                                              "local uint * lmem)\n"
+                "{\n"
+                "size_t gid = get_global_id(0);\n"
+                "size_t lid = get_local_id(0);\n"
+                // We divide work-group into ranges:
+                // [0 - e_wg)[ew_g; q_wg)[q_wg; 3 * ew_g)[3 * ew_g; h_wg)[h_wg; get_local_size(0) - 1]
+                // to simulate 8 subgroups
+                "size_t h_wg = get_local_size(0) / 2;\n" // half of work-group
+                "size_t q_wg = get_local_size(0) / 4;\n" // quarter
+                "size_t e_wg = get_local_size(0) / 8;\n" // one-eighth
+                "if(lid < h_wg) lmem[lid] = gid;\n" // [0; h_wg)
+                "else           temp[gid] = gid;\n" // [h_wg; get_local_size(0) - 1)
+                "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+                "size_t other_lid = (lid + q_wg) % h_wg;\n"
+                "size_t value = 0;\n"
+                "if(lmem[other_lid] == ((gid - lid) + other_lid)){\n"
+                "     value = gid;\n"
+                "}\n"
+                "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+                "if(lid < q_wg){\n" // [0; q_wg)
+                "    if(lid < e_wg) lmem[lid + e_wg] = gid;\n" // [0; e_wg)
+                "    else           lmem[lid - e_wg] = gid;\n" // [e_wg; q_wg)
+                "}\n"
+                "else if(lid < h_wg) {\n" // [q_wg; h_wg)
+                "    if(lid < (3 * e_wg)) lmem[lid + e_wg] = gid;\n" // [q_ww; q_wg + e_wg)
+                "    else                 lmem[lid - e_wg] = gid;\n" // [q_wg + e_wg; h_wg)
+                "}\n"
+                "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+                "if(lid < q_wg){\n" // [0; q_wg)
+                "    output[gid + q_wg] = lmem[lid];\n"
+                "}\n"
+                "else if(lid < h_wg) {\n" // [q_wg; h_wg)
+                "    output[gid - q_wg] = lmem[lid];\n"
+                "}\n"
+                "work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
+                "if(lid < q_wg){\n" // [0; q_wg)
+                "    if(lid < e_wg) temp[gid] = output[gid + (3 * e_wg)];\n" // [0; e_wg)
+                "    else           temp[gid] = output[gid + e_wg];\n" // [e_wg; q_wg)
+                "}\n"
+                "else if(lid < h_wg) {\n" // [q_wg; h_wg)
+                "    if(lid < (3 * e_wg)) temp[gid] = output[gid - e_wg];\n"  // [q_ww; q_wg + e_wg)
+                "    else                 temp[gid] = output[gid - (3 * e_wg)];\n"  // [q_wg + e_wg; h_wg)
+                "}\n"
+                "work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
+                "output[gid] = temp[gid];\n"
+                "}\n";
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+                "void b_function(work_group_named_barrier &b, size_t value, local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "size_t lid = get_local_id(0);\n"
+                // Work-items from the 1st subgroup writes to local memory that will be
+                // later read byt the 0th subgroup, and the other way around - 0th subgroup
+                // writes what 1st subgroup will later read.
+                // b.wait(mem_fence::local) should provide sync between those two subgroups.
+                "if(get_sub_group_id() < 1) lmem[lid + get_max_sub_group_size()] = value;\n"
+                "else                       lmem[lid - get_max_sub_group_size()] = value;\n"
+                "b.wait(mem_fence::local);\n\n" // sync writes to lmem for 2 subgroups (ids: 0, 1)
+                "}\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "global_ptr<uint[]> temp, "
+                                                              "local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "local<work_group_named_barrier> a(4);\n"
+                "local<work_group_named_barrier> b(2);\n"
+                "local<work_group_named_barrier> c(2);\n"
+                "size_t gid = get_global_id(0);\n"
+                "size_t lid = get_local_id(0);\n"
+                "if(get_sub_group_id() < 4)"
+                "{\n"
+                "    lmem[lid] = gid;\n"
+                "    a.wait(mem_fence::local);\n" // sync writes to lmem for 4 subgroups (ids: 0, 1, 2, 3)
+                     // Now all four subgroups should see changes in lmem.
+                "    size_t other_lid = (lid + (2 * get_max_sub_group_size())) % (4 * get_max_sub_group_size());\n"
+                "    size_t value = 0;\n"
+                "    if(lmem[other_lid] == ((gid - lid) + other_lid)){\n"
+                "        value = gid;\n"
+                "    }\n"
+                "    a.wait(mem_fence::local);\n" // sync reads from lmem for 4 subgroups (ids: 0, 1, 2, 3)
+                "    if(get_sub_group_id() < 2)" // ids: 0, 1
+                "    {\n"
+                "        b_function(b, value, lmem);\n"
+                "    }\n"
+                "    else" // ids: 2, 3
+                "    {\n"
+                         // Work-items from the 2nd subgroup writes to local memory that will be
+                         // later read byt the 3rd subgroup, and the other way around - 3rd subgroup
+                         // writes what 2nd subgroup will later read.
+                         // c.wait(mem_fence::local) should provide sync between those two subgroups.
+                "        if(get_sub_group_id() < 3) lmem[lid + get_max_sub_group_size()] = value ;\n"
+                "        else                       lmem[lid - get_max_sub_group_size()] = value;\n"
+                "        c.wait(mem_fence::local);\n" // sync writes to lmem for 2 subgroups (3, 4)
+                "    }\n"
+                     // Now (0, 1) are in sync (local mem), and (3, 4) are in sync (local mem).
+                     // However, subgroups (0, 1) are not in sync with (3, 4).
+                "    if(get_sub_group_id() < 4) {\n" // ids: 0, 1, 2, 3
+                "        if(get_sub_group_id() < 2) output[gid + (2 * get_max_sub_group_size())] = lmem[lid];\n"
+                "        else                       output[gid - (2 * get_max_sub_group_size())] = lmem[lid];\n"
+                "        a.wait(mem_fence::global);\n" // sync writes to global memory (output)
+                                                       // for 4 subgroups (0, 1, 2, 3)
+                "    }\n"
+                "}\n"
+                "else {\n" // subgroups with id > 4
+                "    temp[gid] = gid;\n"
+                "}\n"
+                // Now (0, 1, 2, 3) are in sync (global mem)
+                "if(get_sub_group_id() < 2) {\n"
+                "    if(get_sub_group_id() < 1) temp[gid] = output[gid + (3 * get_max_sub_group_size())];\n"
+                "    else                       temp[gid] = output[gid + (get_max_sub_group_size())];\n"
+                "}\n"
+                "else if(get_sub_group_id() < 4) {\n"
+                "    if(get_sub_group_id() < 3) temp[gid] = output[gid - (get_max_sub_group_size())];\n"
+                "    else                       temp[gid] = output[gid - (3 * get_max_sub_group_size())];\n"
+                "}\n"
+                // Synchronize the entire work-group (in terms of accesses to global memory)
+                "work_group_barrier(mem_fence::global);\n"
+                "output[gid] = temp[gid];\n\n"
+                "}\n";
+        #endif
+    }
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        size_t wg_size;
+        for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
+        {
+            error = clSetKernelArg(kernel, 2, ((wg_size / 2) + 1) * sizeof(cl_uint), NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_ERROR(error)
+            if(max_wg_size >= wg_size) break;
+        }
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            // make sure wg_size is a multiple of 8
+            if(wg_size % 8 > 0) wg_size -= (wg_size % 8);
+            return wg_size;
+        #else
+            // make sure that wg_size will produce at least min_num_sub_groups
+            // subgroups in each work-group
+            size_t local_size[3] = { 1, 1, 1 };
+            size_t min_num_sub_groups = 8;
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
+                                            sizeof(size_t), &min_num_sub_groups,
+                                            sizeof(size_t) * 3, &local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (local_size[0] == 0 || local_size[1] != 1 || local_size[2] != 1)
+            {
+                if(min_num_sub_groups == 1)
+                {
+                    RETURN_ON_ERROR_MSG(-1, "Can't produce local size with one subgroup")
+                }
+                return 0;
+            }
+            local_size[0] = (std::min)(wg_size, local_size[0]);
+            // double-check
+            size_t sub_group_count_for_ndrange;
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
+                                            sizeof(size_t) * 3, local_size,
+                                            sizeof(size_t), &sub_group_count_for_ndrange, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (sub_group_count_for_ndrange < min_num_sub_groups)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE did not return correct value (expected >=%lu, got %lu)",
+                    min_num_sub_groups, sub_group_count_for_ndrange
+                )
+            }
+            return local_size[0];
+        #endif
+    }
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+        // create temp buffer
+        auto temp_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * work_size, NULL, &err);
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
+        err |= clSetKernelArg(kernel, 2, work_group_size * sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+        err = clFinish(queue);
+        err |= clReleaseMemObject(temp_buffer);
+        return err;
+    }
+// ------------------------------------------------------------------------------
+// -------------------------- RUN TESTS -----------------------------------------
+// ------------------------------------------------------------------------------
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+#if !(defined(DEVELOPMENT) && (defined(USE_OPENCLC_KERNELS) || defined(ONLY_SPIRV_COMPILATION)))
+    if(!is_extension_available(device, "cl_khr_subgroup_named_barrier"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroup_named_barrier` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(spec_example_work_group_named_barrier_test())
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+} // namespace
diff --git a/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp b/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp
new file mode 100644
index 0000000..c7074ed
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp
@@ -0,0 +1,342 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <random>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+namespace test_sub_group_barrier {
+enum class barrier_type
+    local,
+    global
+struct test_options
+    barrier_type barrier;
+    size_t max_count;
+    size_t num_tests;
+const std::string source_common = R"(
+    // Circular shift of sub-group local ids
+    size_t get_shifted_local_id(int sub_group_local_id_delta)
+    {
+        const int sub_group_size = (int)get_sub_group_size();
+        return (get_local_id(0) - get_sub_group_local_id()) +
+            (((int)get_sub_group_local_id() + sub_group_local_id_delta) % sub_group_size + sub_group_size) % sub_group_size;
+    }
+    // Get global ids from shifted local ids
+    size_t get_shifted_global_id(int sub_group_local_id_delta)
+    {
+        return get_group_id(0) * get_enqueued_local_size(0) + get_shifted_local_id(sub_group_local_id_delta);
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << R"(
+    #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+    )";
+    s << source_common;
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        output[gid] = gid;
+        sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+            output[other_gid] += other_gid;
+            sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
+            output[gid] += gid;
+            sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output, local long *values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+        values[lid] = gid;
+        sub_group_barrier(CLK_LOCAL_MEM_FENCE);
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+            values[other_lid] += other_gid;
+            sub_group_barrier(CLK_LOCAL_MEM_FENCE);
+            values[lid] += gid;
+            sub_group_barrier(CLK_LOCAL_MEM_FENCE);
+        }
+        output[gid] = values[lid];
+    }
+    )";
+    }
+    return s.str();
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+    using namespace cl;
+    )";
+    s << source_common;
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        output[gid] = gid;
+        sub_group_barrier(mem_fence::global);
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+            output[other_gid] += other_gid;
+            sub_group_barrier(mem_fence::global);
+            output[gid] += gid;
+            sub_group_barrier(mem_fence::global);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output, local_ptr<long[]> values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+        values[lid] = gid;
+        sub_group_barrier(mem_fence::local);
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+            values[other_lid] += other_gid;
+            sub_group_barrier(mem_fence::local);
+            values[lid] += gid;
+            sub_group_barrier(mem_fence::local);
+        }
+        output[gid] = values[lid];
+    }
+    )";
+    }
+    return s.str();
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+    int error = CL_SUCCESS;
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    if (!is_extension_available(device, "cl_khr_subgroups"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+    cl_program program;
+    cl_kernel kernel;
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    size_t max_work_group_size;
+    error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+    if (options.barrier == barrier_type::local)
+    {
+        cl_ulong kernel_local_mem_size;
+        error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        cl_ulong device_local_mem_size;
+        error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(device_local_mem_size), &device_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+        max_work_group_size = (std::min<cl_ulong>)(max_work_group_size, (device_local_mem_size - kernel_local_mem_size) / sizeof(cl_long));
+    }
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> global_size_dis(1, options.max_count);
+    std::uniform_int_distribution<size_t> local_size_dis(1, max_work_group_size);
+    std::uniform_int_distribution<int> iter_dis(0, 20);
+    for (size_t test = 0; test < options.num_tests; test++)
+    {
+        const size_t global_size = global_size_dis(gen);
+        const size_t local_size = local_size_dis(gen);
+        const size_t count = global_size;
+        const int iter_lo = -iter_dis(gen);
+        const int iter_hi = +iter_dis(gen);
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+        error = clSetKernelArg(kernel, 0, sizeof(iter_lo), &iter_lo);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(iter_hi), &iter_hi);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        if (options.barrier == barrier_type::local)
+        {
+            error = clSetKernelArg(kernel, 3, sizeof(cl_long) * local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        }
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+        std::vector<cl_long> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_long) * count,
+            static_cast<void *>(,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+        error = clReleaseMemObject(output_buffer);
+        RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+        for (size_t gid = 0; gid < count; gid++)
+        {
+            const long value = output[gid];
+            const long expected = gid + 2 * gid * (iter_hi - iter_lo);
+            if (value != expected)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Element %lu has incorrect value. Expected: %ld, got: %ld",
+                    gid, expected, value
+                );
+            }
+        }
+    }
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    test_options options;
+    options.barrier = barrier_type::global;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    test_options options;
+    options.barrier = barrier_type::local;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+} // namespace
diff --git a/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp b/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp
new file mode 100644
index 0000000..aa7fbd2
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp
@@ -0,0 +1,330 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <random>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+namespace test_work_group_barrier {
+enum class barrier_type
+    local,
+    global
+struct test_options
+    barrier_type barrier;
+    size_t max_count;
+    size_t num_tests;
+const std::string source_common = R"(
+    // Circular shift of local ids
+    size_t get_shifted_local_id(int local_id_delta)
+    {
+        const int local_size = (int)get_local_size(0);
+        return (((int)get_local_id(0) + local_id_delta) % local_size + local_size) % local_size;
+    }
+    // Get global ids from shifted local ids
+    size_t get_shifted_global_id(int local_id_delta)
+    {
+        return get_group_id(0) * get_enqueued_local_size(0) + get_shifted_local_id(local_id_delta);
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << source_common;
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        output[gid] = gid;
+        work_group_barrier(CLK_GLOBAL_MEM_FENCE);
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+            output[other_gid] += other_gid;
+            work_group_barrier(CLK_GLOBAL_MEM_FENCE);
+            output[gid] += gid;
+            work_group_barrier(CLK_GLOBAL_MEM_FENCE);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output, local long *values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+        values[lid] = gid;
+        work_group_barrier(CLK_LOCAL_MEM_FENCE);
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+            values[other_lid] += other_gid;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+            values[lid] += gid;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+        }
+        output[gid] = values[lid];
+    }
+    )";
+    }
+    return s.str();
+std::string generate_source(test_options options)
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+    using namespace cl;
+    )";
+    s << source_common;
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        output[gid] = gid;
+        work_group_barrier(mem_fence::global);
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+            output[other_gid] += other_gid;
+            work_group_barrier(mem_fence::global);
+            output[gid] += gid;
+            work_group_barrier(mem_fence::global);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output, local_ptr<long[]> values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+        values[lid] = gid;
+        work_group_barrier(mem_fence::local);
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+            values[other_lid] += other_gid;
+            work_group_barrier(mem_fence::local);
+            values[lid] += gid;
+            work_group_barrier(mem_fence::local);
+        }
+        output[gid] = values[lid];
+    }
+    )";
+    }
+    return s.str();
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+    int error = CL_SUCCESS;
+    cl_program program;
+    cl_kernel kernel;
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    size_t max_work_group_size;
+    error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+    if (options.barrier == barrier_type::local)
+    {
+        cl_ulong kernel_local_mem_size;
+        error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        cl_ulong device_local_mem_size;
+        error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(device_local_mem_size), &device_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+        max_work_group_size = (std::min<cl_ulong>)(max_work_group_size, (device_local_mem_size - kernel_local_mem_size) / sizeof(cl_long));
+    }
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> global_size_dis(1, options.max_count);
+    std::uniform_int_distribution<size_t> local_size_dis(1, max_work_group_size);
+    std::uniform_int_distribution<int> iter_dis(0, 20);
+    for (size_t test = 0; test < options.num_tests; test++)
+    {
+        const size_t global_size = global_size_dis(gen);
+        const size_t local_size = local_size_dis(gen);
+        const size_t count = global_size;
+        const int iter_lo = -iter_dis(gen);
+        const int iter_hi = +iter_dis(gen);
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+        error = clSetKernelArg(kernel, 0, sizeof(iter_lo), &iter_lo);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(iter_hi), &iter_hi);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        if (options.barrier == barrier_type::local)
+        {
+            error = clSetKernelArg(kernel, 3, sizeof(cl_long) * local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        }
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+        std::vector<cl_long> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_long) * count,
+            static_cast<void *>(,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+        error = clReleaseMemObject(output_buffer);
+        RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+        for (size_t gid = 0; gid < count; gid++)
+        {
+            const long value = output[gid];
+            const long expected = gid + 2 * gid * (iter_hi - iter_lo);
+            if (value != expected)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Element %lu has incorrect value. Expected: %ld, got: %ld",
+                    gid, expected, value
+                );
+            }
+        }
+    }
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    test_options options;
+    options.barrier = barrier_type::global;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    test_options options;
+    options.barrier = barrier_type::local;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+} // namespace
diff --git a/test_conformance/clcpp/utils_common/errors.hpp b/test_conformance/clcpp/utils_common/errors.hpp
new file mode 100644
index 0000000..c169462
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/errors.hpp
@@ -0,0 +1,134 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <string>
+#include "../harness/errorHelpers.h"
+// ------------- Check OpenCL error helpers (marcos) -----------------
+std::string get_cl_error_string(cl_int error)
+#define CASE_CL_ERROR(x) case x: return #x;
+    switch (error)
+    {
+        default: return "(unknown error code)";
+    }
+#define CHECK_ERROR(x) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+    }
+#define CHECK_ERROR_MSG(x, ...) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: " __VA_ARGS__);\
+        log_error("\n");\
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+    }
+#define RETURN_ON_ERROR(x) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+        return x;\
+    }
+#define RETURN_ON_ERROR_MSG(x, ...) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: " __VA_ARGS__);\
+        log_error("\n");\
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+        return x;\
+    }
+#define RETURN_ON_CL_ERROR(x, cl_func_name) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: %s failed: %s (%d)\n", cl_func_name, get_cl_error_string(x).c_str(), x);\
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+        return x;\
+    }
diff --git a/test_conformance/clcpp/utils_common/is_vector_type.hpp b/test_conformance/clcpp/utils_common/is_vector_type.hpp
new file mode 100644
index 0000000..0232e51
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/is_vector_type.hpp
@@ -0,0 +1,60 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+// is_vector_type<Type>::value is true if Type is an OpenCL
+// vector type; otherwise - false.
+// Examples: 
+// * is_vector_type<cl_float>::value == false
+// * is_vector_type<cl_float4>::value == true
+template<class Type>
+struct is_vector_type
+    const static bool value = false;
+#define ADD_VECTOR_TYPE(Type, n) \
+    template<> \
+    struct is_vector_type<Type ## n> \
+    { \
+        const static bool value = true; \
+    };
+#define ADD_VECTOR_TYPES(Type) \
+    ADD_VECTOR_TYPE(Type, 2) \
+    ADD_VECTOR_TYPE(Type, 4) \
+    ADD_VECTOR_TYPE(Type, 8) \
+    ADD_VECTOR_TYPE(Type, 16)
diff --git a/test_conformance/clcpp/utils_common/kernel_helpers.hpp b/test_conformance/clcpp/utils_common/kernel_helpers.hpp
new file mode 100644
index 0000000..189b823
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/kernel_helpers.hpp
@@ -0,0 +1,50 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+// Creates a OpenCL C++/C program out_program and kernel out_kernel.
+int create_opencl_kernel(cl_context context,
+                         cl_program *out_program,
+                         cl_kernel *out_kernel,
+                         const char *source,
+                         const std::string& kernel_name,
+                         const std::string& build_options = "",
+                         const bool openclCXX = true)
+    return create_single_kernel_helper(
+        context, out_program, out_kernel, 1, &source,
+        kernel_name.c_str(), build_options.c_str(), openclCXX
+    );
+int create_opencl_kernel(cl_context context,
+                         cl_program *out_program,
+                         cl_kernel *out_kernel,
+                         const std::string& source,
+                         const std::string& kernel_name,
+                         const std::string& build_options = "",
+                         const bool openclCXX = true)
+    return create_opencl_kernel(
+        context, out_program, out_kernel,
+        source.c_str(), kernel_name, build_options, openclCXX
+    );
diff --git a/test_conformance/clcpp/utils_common/make_vector_type.hpp b/test_conformance/clcpp/utils_common/make_vector_type.hpp
new file mode 100644
index 0000000..11b1185
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/make_vector_type.hpp
@@ -0,0 +1,65 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+// Using scalar_type and i creates a type scalar_typei.
+// Example:
+// * make_vector_type<cl_uint, 8>::type is cl_uint8
+// * make_vector_type<cl_uint, 1>::type is cl_uint
+template<class scalar_type, size_t i>
+struct make_vector_type
+    typedef void type;
+#define ADD_MAKE_VECTOR_TYPE(Type, n) \
+    template<> \
+    struct make_vector_type<Type, n> \
+    { \
+        typedef Type ## n type; \
+    };
+#define ADD_MAKE_VECTOR_TYPES(Type) \
+    template<> \
+    struct make_vector_type<Type, 1> \
+    { \
+        typedef Type type; \
+    }; \
+    ADD_MAKE_VECTOR_TYPE(Type, 2) \
+    ADD_MAKE_VECTOR_TYPE(Type, 3) \
+    ADD_MAKE_VECTOR_TYPE(Type, 4) \
+    ADD_MAKE_VECTOR_TYPE(Type, 8) \
diff --git a/test_conformance/clcpp/utils_common/scalar_type.hpp b/test_conformance/clcpp/utils_common/scalar_type.hpp
new file mode 100644
index 0000000..4c939bb
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/scalar_type.hpp
@@ -0,0 +1,64 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+// scalar_type<Type>::type returns scalar type of Type.
+// Examples:
+// * scalar_type<cl_float>::type is cl_float
+// * scalar_type<cl_float4>::types is cl_float
+template<class Type>
+struct scalar_type
+    typedef void type;
+#define ADD_VECTOR_TYPE(Type, n) \
+    template<> \
+    struct scalar_type<Type ## n> \
+    { \
+        typedef Type type; \
+    };
+#define ADD_VECTOR_TYPES(Type) \
+    template<> \
+    struct scalar_type<Type> \
+    { \
+        typedef Type type; \
+    }; \
+    ADD_VECTOR_TYPE(Type, 2) \
+    ADD_VECTOR_TYPE(Type, 4) \
+    ADD_VECTOR_TYPE(Type, 8) \
+    ADD_VECTOR_TYPE(Type, 16)
diff --git a/test_conformance/clcpp/utils_common/string.hpp b/test_conformance/clcpp/utils_common/string.hpp
new file mode 100644
index 0000000..ad5ac9f
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/string.hpp
@@ -0,0 +1,70 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <type_traits>
+#include "is_vector_type.hpp"
+#include "scalar_type.hpp"
+#include "type_name.hpp"
+#include "../common.hpp"
+template<class type>
+std::string format_value(const type& value,
+                         typename std::enable_if<is_vector_type<type>::value>::type* = 0)
+    std::stringstream s;
+    s << type_name<type>() << "{ ";
+    s << std::scientific << std::setprecision(6);
+    for (size_t j = 0; j < vector_size<type>::value; j++)
+    {
+        if (j > 0)
+            s << ", ";
+        s << value.s[j];
+    }
+    s << " }";
+    return s.str();
+template<class type>
+std::string format_value(const type& value,
+                         typename std::enable_if<!is_vector_type<type>::value>::type* = 0)
+    std::stringstream s;
+    s << type_name<type>() << "{ ";
+    s << std::scientific << std::setprecision(6);
+    s << value;
+    s << " }";
+    return s.str();
+void replace_all(std::string& str, const std::string& from, const std::string& to)
+    size_t start_pos = 0;
+    while((start_pos = str.find(from, start_pos)) != std::string::npos) {
+        str.replace(start_pos, from.length(), to);
+        start_pos += to.length();
+    }
diff --git a/test_conformance/clcpp/utils_common/type_name.hpp b/test_conformance/clcpp/utils_common/type_name.hpp
new file mode 100644
index 0000000..c66f6e4
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/type_name.hpp
@@ -0,0 +1,65 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+// Returns type name (in OpenCL device). 
+// cl_uint - "uint", cl_float2 -> "float2"
+template<class Type>
+std::string type_name()
+    return "unknown";
+#define ADD_TYPE_NAME(Type, str) \
+    template<> \
+    std::string type_name<Type>() \
+    { \
+        return #str; \
+    }
+#define ADD_TYPE_NAME2(Type) \
+    ADD_TYPE_NAME(cl_ ## Type, Type)
+#define ADD_TYPE_NAME3(Type, x) \
+    ADD_TYPE_NAME2(Type ## x)
+#define ADD_TYPE_NAMES(Type) \
+    ADD_TYPE_NAME2(Type) \
+    ADD_TYPE_NAME3(Type, 2) \
+    ADD_TYPE_NAME3(Type, 4) \
+    ADD_TYPE_NAME3(Type, 8) \
+    ADD_TYPE_NAME3(Type, 16)
+#undef ADD_TYPE_NAME3
+#undef ADD_TYPE_NAME2
diff --git a/test_conformance/clcpp/utils_common/type_supported.hpp b/test_conformance/clcpp/utils_common/type_supported.hpp
new file mode 100644
index 0000000..8d4f721
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/type_supported.hpp
@@ -0,0 +1,106 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+// Returns true if type is supported by device; otherwise - false;
+template<class Type>
+bool type_supported(cl_device_id device)
+    (void) device;
+    return false;
+#define ADD_SUPPORTED_TYPE(Type) \
+    template<> \
+    bool type_supported<Type>(cl_device_id device) \
+    { \
+        (void) device; \
+        return true; \
+    }
+// ulong
+bool type_supported<cl_ulong>(cl_device_id device)
+    // long types do not have to be supported in EMBEDDED_PROFILE.
+    char profile[128];
+    int error;
+    error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    if (error != CL_SUCCESS)
+    {
+        log_error("ERROR: clGetDeviceInfo failed with CL_DEVICE_PROFILE\n");
+        return false;
+    }
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        return is_extension_available(device, "cles_khr_int64");
+    return true;
+// long
+bool type_supported<cl_long>(cl_device_id device)
+    return type_supported<cl_ulong>(device);
+// double
+bool type_supported<cl_double>(cl_device_id device)
+    return is_extension_available(device, "cl_khr_fp64");
+#define ADD_SUPPORTED_VEC_TYPE1(Type, n) \
+    template<> \
+    bool type_supported<Type ## n>(cl_device_id device) \
+    { \
+        return type_supported<Type>(device); \
+    }
+#define ADD_SUPPORTED_VEC_TYPE2(Type) \
+// ADD_SUPPORTED_VEC_TYPE2(cl_double)
diff --git a/test_conformance/clcpp/utils_common/vector_size.hpp b/test_conformance/clcpp/utils_common/vector_size.hpp
new file mode 100644
index 0000000..4817506
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/vector_size.hpp
@@ -0,0 +1,61 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+// Returns 1 if Type is a scalar type; otherwise if it's a vector type, 
+// it returns number of components in that Type. 
+template<class Type>
+struct vector_size
+    const static size_t value = 1;
+#define ADD_VECTOR_SIZE_TYPE(Type, n) \
+    template<> \
+    struct vector_size<Type ## n> \
+    { \
+        const static size_t value = n; \
+    };
+#define ADD_VECTOR_SIZE_TYPES(Type) \
+    template<> \
+    struct vector_size<Type> \
+    { \
+        const static size_t value = 1; \
+    }; \
+    ADD_VECTOR_SIZE_TYPE(Type, 2) \
+    ADD_VECTOR_SIZE_TYPE(Type, 4) \
+    ADD_VECTOR_SIZE_TYPE(Type, 8) \
diff --git a/test_conformance/clcpp/utils_test/binary.hpp b/test_conformance/clcpp/utils_test/binary.hpp
new file mode 100644
index 0000000..5ff35c9
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/binary.hpp
@@ -0,0 +1,308 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <algorithm>
+#include <string>
+#include <cmath>
+#include "../common.hpp"
+#include "detail/base_func_type.hpp"
+#include "generate_inputs.hpp"
+#include "compare.hpp"
+template<class IN1, class IN2, class OUT1>
+struct binary_func : public detail::base_func_type<OUT1>
+    typedef IN1 in1_type;
+    typedef IN2 in2_type;
+    typedef OUT1 out_type;
+    virtual ~binary_func() {};
+    virtual std::string str() = 0;
+    std::string decl_str()
+    {
+        return type_name<OUT1>() + "(" + type_name<IN1>() + ", " + type_name<IN2>() + ")";
+    }
+    bool is_in1_bool()
+    {
+        return false;
+    }
+    bool is_in2_bool()
+    {
+        return false;
+    }
+    IN1 min1()
+    {
+        return detail::get_min<IN1>();
+    }
+    IN1 max1()
+    {
+        return detail::get_max<IN1>();
+    }
+    IN2 min2()
+    {
+        return detail::get_min<IN2>();
+    }
+    IN2 max2()
+    {
+        return detail::get_max<IN2>();
+    }
+    std::vector<IN1> in1_special_cases()
+    {
+        return { };
+    }
+    std::vector<IN2> in2_special_cases()
+    {
+        return { };
+    }
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const IN1& in1, const IN2& in2, const T& expected)
+    {
+        typedef
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        // Take care of unused variable warning
+        (void) in1;
+        (void) in2;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in1_type, class in2_type, class out_type>
+std::string generate_kernel_binary(func_type func)
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + ", " + in2_value + "))";
+    }
+    return
+        "__kernel void " + func.get_kernel_name() + "(global " + type_name<in1_type>() + " *input1,\n"
+        "                                      global " + type_name<in2_type>() + " *input2,\n"
+        "                                      global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+template <class func_type, class in1_type, class in2_type, class out_type>
+std::string generate_kernel_binary(func_type func)
+    std::string headers = func.headers();
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_cast<int" + i + ">(" + func.str() + "(" + in1_value + ", " + in2_value + "))";
+    }
+    if(func.is_out_bool() || func.is_in1_bool() || func.is_in2_bool())
+    {
+        if(headers.find("#include <opencl_convert>") == std::string::npos)
+        {
+            headers += "#include <opencl_convert>\n";
+        }
+    }
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name<in1_type>() + "[]> input1,\n"
+        "                                      global_ptr<" + type_name<in2_type>() + "[]> input2,\n"
+        "                                      global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+template<class INPUT1, class INPUT2, class OUTPUT, class binary_op>
+bool verify_binary(const std::vector<INPUT1> &in1,
+                   const std::vector<INPUT2> &in2,
+                   const std::vector<OUTPUT> &out,
+                   binary_op op)
+    for(size_t i = 0; i < in1.size(); i++)
+    {
+        auto expected = op(in1[i], in2[i]);
+        if(!are_equal(expected, out[i],[i], in2[i], expected), op))
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+template <class binary_op>
+int test_binary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, binary_op op)
+    cl_mem buffers[3];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+    typedef typename binary_op::in1_type INPUT1;
+    typedef typename binary_op::in2_type INPUT2;
+    typedef typename binary_op::out_type OUTPUT;
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT1>(device)
+         && type_supported<INPUT2>(device)
+         && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    std::string code_str = generate_kernel_binary<binary_op, INPUT1, INPUT2, OUTPUT>(op);
+    std::string kernel_name = op.get_kernel_name();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    std::vector<INPUT1> in1_spec_cases = op.in1_special_cases();
+    std::vector<INPUT2> in2_spec_cases = op.in2_special_cases();
+    prepare_special_cases(in1_spec_cases, in2_spec_cases);
+    std::vector<INPUT1> input1 = generate_input<INPUT1>(count, op.min1(), op.max1(), in1_spec_cases);
+    std::vector<INPUT2> input2 = generate_input<INPUT2>(count, op.min2(), op.max2(), in2_spec_cases);
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT1) * input1.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    buffers[1] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT2) * input2.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    buffers[2] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT1) * input1.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer")
+    err = clEnqueueWriteBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(INPUT2) * input2.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer")
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof(buffers[2]), &buffers[2]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[2], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (!verify_binary(input1, input2, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "test_%s %s(%s, %s) failed", op.str().c_str(),
+            type_name<OUTPUT>().c_str(), type_name<INPUT1>().c_str(), type_name<INPUT2>().c_str()
+        );
+    }
+    log_info(
+        "test_%s %s(%s, %s) passed\n", op.str().c_str(),
+        type_name<OUTPUT>().c_str(), type_name<INPUT1>().c_str(), type_name<INPUT2>().c_str()
+    );
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(buffers[2]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
diff --git a/test_conformance/clcpp/utils_test/compare.hpp b/test_conformance/clcpp/utils_test/compare.hpp
new file mode 100644
index 0000000..a22b88f
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/compare.hpp
@@ -0,0 +1,161 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+#include <cmath>
+#include "../common.hpp"
+// Checks if x is equal to y.
+template<class type, class delta_type, class op_type>
+inline bool are_equal(const type& x,
+                      const type& y,
+                      const delta_type& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        is_vector_type<type>::value
+                        && std::is_integral<typename scalar_type<type>::type>::value
+                      >::type* = 0)
+    (void) delta;
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+    {
+        if(op.is_out_bool())
+        {
+            if(!((x.s[i] != 0) == (y.s[i] != 0)))
+            {
+                return false;
+            }
+        }
+        else if(!(x.s[i] == y.s[i]))
+        {
+            return false;
+        }
+    }
+    return true;
+template<class type, class delta_type, class op_type>
+inline bool are_equal(const type& x,
+                      const type& y,
+                      const delta_type& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        !is_vector_type<type>::value
+                        && std::is_integral<type>::value
+                      >::type* = 0)
+    (void) delta;
+    if(op.is_out_bool())
+    {
+        if(!((x != 0) == (y != 0)))
+        {
+            return false;
+        }
+    }
+    return x == y;
+template<class type, class type1, class type2, class op_type>
+inline bool are_equal(const type& x,
+                      const type1& y,
+                      const type2& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        !is_vector_type<type>::value
+                        && std::is_floating_point<type>::value
+                      >::type* = 0)
+    // x - expected
+    // y - result
+    // INFO:
+    // Whe don't care about subnormal values in OpenCL C++ tests
+    if(std::fpclassify(static_cast<type1>(x)) == FP_SUBNORMAL || std::fpclassify(y) == FP_SUBNORMAL)
+    {
+        return true;
+    }
+    // both are NaN
+    if((std::isnan)(static_cast<type1>(x)) && (std::isnan)(y))
+    {
+        return true;
+    }
+    // one is NaN
+    else if((std::isnan)(static_cast<type1>(x)) || (std::isnan)(y))
+    {
+        return false;
+    }
+    // Check for perfect match, it also covers inf, -inf
+    if(static_cast<type1>(x) != y)
+    {
+        // Check if values are close
+        if(std::abs(static_cast<type1>(x) - y) > (std::max)(std::numeric_limits<type2>::epsilon(), std::abs(delta)))
+        {
+            return false;
+        }
+        // Check ulp
+        if(op.use_ulp())
+        {
+            return !(std::abs(Ulp_Error(x, y)) > op.ulp());
+        }
+    }
+    return true;
+template<class type, class type1, class type2, class op_type>
+inline bool are_equal(const type& x,
+                      const type1& y,
+                      const type2& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        is_vector_type<type>::value
+                        && std::is_floating_point<typename scalar_type<type>::type>::value
+                      >::type* = 0)
+    // x - expected
+    // y - result
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+    {
+        if(!are_equal(x.s[i], y.s[i], delta.s[i], op))
+        {
+            return false;
+        }
+    }
+    return true;
+template<class type, class type1, class func>
+inline void print_error_msg(const type& expected, const type1& result, size_t i, func op)
+    log_error(
+        "ERROR: test_%s %s failed. Error at %lu: Expected: %s, got: %s\n",
+        op.str().c_str(),
+        op.decl_str().c_str(),
+        i,
+        format_value(expected).c_str(),
+        format_value(result).c_str()
+    );
diff --git a/test_conformance/clcpp/utils_test/detail/base_func_type.hpp b/test_conformance/clcpp/utils_test/detail/base_func_type.hpp
new file mode 100644
index 0000000..92e375d
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/detail/base_func_type.hpp
@@ -0,0 +1,112 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+#include <cmath>
+#include "../../common.hpp"
+#include "vec_helpers.hpp"
+namespace detail
+template<class OUT1>
+struct base_func_type
+    virtual ~base_func_type() {};
+    // Returns function name
+    virtual std::string str() = 0;
+    // Returns name of the test kernel for that function
+    virtual std::string get_kernel_name()
+    {
+        std::string kn = this->str();
+        replace_all(kn, "::", "_");
+        return "test_" + kn;
+    }
+    // Returns required defines and pragmas.
+    virtual std::string defs()
+    {
+        return "";
+    }
+    // Returns required OpenCL C++ headers.
+    virtual std::string headers()
+    {
+        return "";
+    }
+    // Return true if OUT1 type in OpenCL kernel should be treated
+    // as bool type; false otherwise.
+    bool is_out_bool()
+    {
+        return false;
+    }
+    // Max ULP error, that is error should be raised when
+    // if Ulp_Error(result, expected) > ulp()
+    float ulp()
+    {
+        return 0.0f;
+    }
+    // Should we check ULP error when verifing if the result is
+    // correct? 
+    //
+    // (This effects how are_equal() function works, 
+    // it may not have effect if verify() method in derived
+    // class does not use are_equal() function.)
+    //
+    // Only for FP numbers/vectors
+    bool use_ulp()
+    {
+        return true;
+    }
+    // Max error. Error should be raised if
+    // abs(result - expected) > delta(.., expected)
+    //
+    // Default value: 0.001 * expected
+    //
+    // (This effects how are_equal() function works, 
+    // it may not have effect if verify() method in derived
+    // class does not use are_equal() function.)
+    //
+    // Only for FP numbers/vectors
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const T& expected)
+    {
+        typedef 
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+} // detail namespace
diff --git a/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp b/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp
new file mode 100644
index 0000000..05df42a
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp
@@ -0,0 +1,104 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+#include <cmath>
+#include "../../common.hpp"
+namespace detail
+template<class T>
+T make_value(typename scalar_type<T>::type x, typename std::enable_if<is_vector_type<T>::value>::type* = 0)
+    T value;
+    for(size_t i = 0; i < vector_size<T>::value; i++)
+    {
+        value.s[i] = x;
+    }
+    return value;
+template<class T>
+T make_value(T x, typename std::enable_if<!is_vector_type<T>::value>::type* = 0)
+    return x;
+template<class result_type, class IN1, class IN2>
+result_type multiply(const IN1& x, const IN2& y, typename std::enable_if<is_vector_type<result_type>::value>::type* = 0)
+    static_assert(
+        (vector_size<IN1>::value == vector_size<IN2>::value)
+            && (vector_size<IN2>::value == vector_size<result_type>::value),
+        "Vector sizes must be the same."
+    );
+    typedef typename scalar_type<result_type>::type SCALAR;
+    result_type value;
+    for(size_t i = 0; i < vector_size<result_type>::value; i++)
+    {
+        value.s[i] = static_cast<SCALAR>(x.s[i]) * static_cast<SCALAR>(y.s[i]);
+    }
+    return value;
+template<class result_type, class IN1, class IN2>
+result_type multiply(const IN1& x, const IN2& y, typename std::enable_if<!is_vector_type<result_type>::value>::type* = 0)
+    static_assert(
+        !is_vector_type<IN1>::value && !is_vector_type<IN2>::value,
+        "IN1 and IN2 must be scalar types"
+    );
+    return static_cast<result_type>(x) * static_cast<result_type>(y);
+template<class T>
+T get_min()
+    typedef typename scalar_type<T>::type SCALAR;
+    return make_value<T>((std::numeric_limits<SCALAR>::min)());
+template<class T>
+T get_max()
+    typedef typename scalar_type<T>::type SCALAR;
+    return make_value<T>((std::numeric_limits<SCALAR>::max)());
+template<class T>
+T get_part_max(typename scalar_type<T>::type x)
+    typedef typename scalar_type<T>::type SCALAR;
+    return make_value<T>((std::numeric_limits<SCALAR>::max)() / x);
+template<class T>
+T def_limit(typename scalar_type<T>::type x)
+    return make_value<T>(x);
+} // detail namespace
diff --git a/test_conformance/clcpp/utils_test/generate_inputs.hpp b/test_conformance/clcpp/utils_test/generate_inputs.hpp
new file mode 100644
index 0000000..bb0d750
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/generate_inputs.hpp
@@ -0,0 +1,331 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+#include <cmath>
+#include "../common.hpp"
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    is_vector_type<type>::value
+                                    && std::is_integral<typename scalar_type<type>::type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && !(std::is_same<typename scalar_type<type>::type, cl_uchar>::value
+                                         || std::is_same<typename scalar_type<type>::type, cl_char>::value)
+                                 >::type* = 0)
+    typedef typename scalar_type<type>::type SCALAR;
+    const size_t vec_size = vector_size<type>::value;
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::vector<std::uniform_int_distribution<SCALAR>> dists(vec_size);
+    for(size_t i = 0; i < vec_size; i++)
+    {
+        dists[i] = std::uniform_int_distribution<SCALAR>(min.s[i], max.s[i]);
+    }
+    for(auto& i : input)
+    {
+        for(size_t j = 0; j < vec_size; j++)
+        {
+            i.s[j] = dists[j](gen);
+        }
+    }
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    is_vector_type<type>::value
+                                    && std::is_integral<typename scalar_type<type>::type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && (std::is_same<typename scalar_type<type>::type, cl_uchar>::value
+                                        || std::is_same<typename scalar_type<type>::type, cl_char>::value)
+                                 >::type* = 0)
+    typedef typename scalar_type<type>::type SCALAR;
+    const size_t vec_size = vector_size<type>::value;
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::vector<std::uniform_int_distribution<cl_int>> dists(vec_size);
+    for(size_t i = 0; i < vec_size; i++)
+    {
+        dists[i] = std::uniform_int_distribution<cl_int>(
+            static_cast<cl_int>(min.s[i]),
+            static_cast<cl_int>(max.s[i])
+        );
+    }
+    for(auto& i : input)
+    {
+        for(size_t j = 0; j < vec_size; j++)
+        {
+            i.s[j] = static_cast<SCALAR>(dists[j](gen));
+        }
+    }
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    !is_vector_type<type>::value
+                                    && std::is_integral<type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && !(std::is_same<type, cl_uchar>::value || std::is_same<type, cl_char>::value)
+                                 >::type* = 0)
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<type> dis(min, max);
+    for(auto& i : input)
+    {
+        i = dis(gen);
+    }
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    !is_vector_type<type>::value
+                                    && std::is_integral<type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && (std::is_same<type, cl_uchar>::value || std::is_same<type, cl_char>::value)
+                                 >::type* = 0)
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<cl_int> dis(
+        static_cast<cl_int>(min), static_cast<cl_int>(max)
+    );
+    for(auto& i : input)
+    {
+        i = static_cast<type>(dis(gen));
+    }
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    is_vector_type<type>::value
+                                    && std::is_floating_point<typename scalar_type<type>::type>::value
+                                 >::type* = 0)
+    typedef typename scalar_type<type>::type SCALAR;
+    const size_t vec_size = vector_size<type>::value;
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::vector<std::uniform_real_distribution<SCALAR>> dists(vec_size);
+    for(size_t i = 0; i < vec_size; i++)
+    {
+        // Fatal error
+        if(std::fpclassify(max.s[i]) == FP_SUBNORMAL || std::fpclassify(min.s[i]) == FP_SUBNORMAL)
+        {
+            log_error("ERROR: min and max value for input generation CAN NOT BE subnormal\n");
+        }
+        dists[i] = std::uniform_real_distribution<SCALAR>(min.s[i], max.s[i]);
+    }
+    for(auto& i : input)
+    {
+        for(size_t j = 0; j < vec_size; j++)
+        {
+            SCALAR x = dists[j](gen);
+            while(std::fpclassify(x) == FP_SUBNORMAL)
+            {
+                x = dists[j](gen);
+            }
+            i.s[j] = x;
+        }
+    }
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    !is_vector_type<type>::value
+                                    && std::is_floating_point<type>::value
+                                 >::type* = 0)
+    // Fatal error
+    if(std::fpclassify(max) == FP_SUBNORMAL || std::fpclassify(min) == FP_SUBNORMAL)
+    {
+        log_error("ERROR: min and max value for input generation CAN NOT BE subnormal\n");
+    }
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<type> dis(min, max);
+    for(auto& i : input)
+    {
+        type x = dis(gen);
+        while(std::fpclassify(x) == FP_SUBNORMAL)
+        {
+            x = dis(gen);
+        }
+        i = x;
+    }
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+template <class type>
+std::vector<type> generate_output(size_t count,
+                                  typename scalar_type<type>::type svalue = typename scalar_type<type>::type(0),
+                                  typename std::enable_if<is_vector_type<type>::value>::type* = 0)
+    type value;
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+        value.s[i] = svalue;
+    return std::vector<type>(count, value);
+template <class type>
+std::vector<type> generate_output(size_t count,
+                                  type svalue = type(0),
+                                  typename std::enable_if<!is_vector_type<type>::value>::type* = 0)
+    return std::vector<type>(count, svalue);
+template<class T, class K>
+void prepare_special_cases(std::vector<T>& in1_spec_cases, std::vector<K>& in2_spec_cases)
+    if(in1_spec_cases.empty() || in2_spec_cases.empty())
+    {
+        return;
+    }
+    size_t new_size = in1_spec_cases.size() * in2_spec_cases.size();
+    std::vector<T> new_in1(new_size);
+    std::vector<K> new_in2(new_size);
+    for(size_t i = 0; i < in1_spec_cases.size(); i++)
+    {
+        for(size_t j = 0; j < in2_spec_cases.size(); j++)
+        {
+            new_in1[(i * in2_spec_cases.size()) + j] = in1_spec_cases[i];
+            new_in2[(i * in2_spec_cases.size()) + j] = in2_spec_cases[j];
+        }
+    }
+    in1_spec_cases = new_in1;
+    in2_spec_cases = new_in2;
+template<class T, class K, class M>
+void prepare_special_cases(std::vector<T>& in1_spec_cases,
+                           std::vector<K>& in2_spec_cases,
+                           std::vector<M>& in3_spec_cases)
+    if(in3_spec_cases.empty())
+    {
+        return prepare_special_cases(in1_spec_cases, in2_spec_cases);
+    }
+    else if (in2_spec_cases.empty())
+    {
+        return prepare_special_cases(in1_spec_cases, in3_spec_cases);
+    }
+    else if (in1_spec_cases.empty())
+    {
+        return prepare_special_cases(in2_spec_cases, in3_spec_cases);
+    }
+    size_t new_size = in1_spec_cases.size() * in2_spec_cases.size() * in3_spec_cases.size();
+    std::vector<T> new_in1(new_size);
+    std::vector<K> new_in2(new_size);
+    std::vector<M> new_in3(new_size);
+    for(size_t i = 0; i < in1_spec_cases.size(); i++)
+    {
+        for(size_t j = 0; j < in2_spec_cases.size(); j++)
+        {
+            for(size_t k = 0; k < in3_spec_cases.size(); k++)
+            {
+                size_t idx =
+                    (i * in2_spec_cases.size() * in3_spec_cases.size())
+                    + (j * in3_spec_cases.size())
+                    + k;
+                new_in1[idx] = in1_spec_cases[i];
+                new_in2[idx] = in2_spec_cases[j];
+                new_in3[idx] = in3_spec_cases[k];
+            }
+        }
+    }
+    in1_spec_cases = new_in1;
+    in2_spec_cases = new_in2;
+    in3_spec_cases = new_in3;
diff --git a/test_conformance/clcpp/utils_test/ternary.hpp b/test_conformance/clcpp/utils_test/ternary.hpp
new file mode 100644
index 0000000..342681e
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/ternary.hpp
@@ -0,0 +1,368 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <algorithm>
+#include <string>
+#include <cmath>
+#include "../common.hpp"
+#include "detail/base_func_type.hpp"
+#include "generate_inputs.hpp"
+#include "compare.hpp"
+template<class IN1, class IN2, class IN3, class OUT1>
+struct ternary_func : public detail::base_func_type<OUT1>
+    typedef IN1 in1_type;
+    typedef IN2 in2_type;
+    typedef IN3 in3_type;
+    typedef OUT1 out_type;
+    virtual ~ternary_func() {};
+    virtual std::string str() = 0;
+    std::string decl_str()
+    {
+        return type_name<OUT1>() + "(" + type_name<IN1>() + ", " + type_name<IN2>()+  ", " + type_name<IN3>() + ")";
+    }
+    bool is_in1_bool()
+    {
+        return false;
+    }
+    bool is_in2_bool()
+    {
+        return false;
+    }
+    bool is_in3_bool()
+    {
+        return false;
+    }
+    IN1 min1()
+    {
+        return detail::get_min<IN1>();
+    }
+    IN1 max1()
+    {
+        return detail::get_max<IN1>();
+    }
+    IN2 min2()
+    {
+        return detail::get_min<IN2>();
+    }
+    IN2 max2()
+    {
+        return detail::get_max<IN2>();
+    }
+    IN3 min3()
+    {
+        return detail::get_min<IN3>();
+    }
+    IN3 max3()
+    {
+        return detail::get_max<IN3>();
+    }
+    std::vector<IN1> in1_special_cases()
+    {
+        return { };
+    }
+    std::vector<IN2> in2_special_cases()
+    {
+        return { };
+    }
+    std::vector<IN3> in3_special_cases()
+    {
+        return { };
+    }
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const IN1& in1, const IN2& in2, const IN3& in3, const T& expected)
+    {
+        typedef
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        // Take care of unused variable warning
+        (void) in1;
+        (void) in2;
+        (void) in3;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in1_type, class in2_type, class in3_type, class out_type>
+std::string generate_kernel_ternary(func_type func)
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string in3_value = "input3[gid]";
+    if(func.is_in3_bool())
+    {
+        std::string i = vector_size<in3_type>::value == 1 ? "" : std::to_string(vector_size<in3_type>::value);
+        in3_value = "(input3[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + "))";
+    }
+    return
+        "__kernel void " + func.get_kernel_name() + "(global " + type_name<in1_type>() + " *input1,\n"
+        "                                      global " + type_name<in2_type>() + " *input2,\n"
+        "                                      global " + type_name<in3_type>() + " *input3,\n"
+        "                                      global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+template <class func_type, class in1_type, class in2_type, class in3_type, class out_type>
+std::string generate_kernel_ternary(func_type func)
+    std::string headers = func.headers();
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string in3_value = "input3[gid]";
+    if(func.is_in3_bool())
+    {
+        std::string i = vector_size<in3_type>::value == 1 ? "" : std::to_string(vector_size<in3_type>::value);
+        in3_value = "(input3[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_cast<int" + i + ">(" + func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + "))";
+    }
+    if(func.is_out_bool() || func.is_in1_bool() || func.is_in2_bool() || func.is_in3_bool())
+    {
+        if(headers.find("#include <opencl_convert>") == std::string::npos)
+        {
+            headers += "#include <opencl_convert>\n";
+        }
+    }
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name<in1_type>() + "[]> input1,\n"
+        "                                      global_ptr<" + type_name<in2_type>() + "[]> input2,\n"
+        "                                      global_ptr<" + type_name<in3_type>() + "[]> input3,\n"
+        "                                      global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+template<class INPUT1, class INPUT2, class INPUT3, class OUTPUT, class ternary_op>
+bool verify_ternary(const std::vector<INPUT1> &in1,
+                    const std::vector<INPUT2> &in2,
+                    const std::vector<INPUT3> &in3,
+                    const std::vector<OUTPUT> &out,
+                    ternary_op op)
+    for(size_t i = 0; i < in1.size(); i++)
+    {
+        auto expected = op(in1[i], in2[i], in3[i]);
+        if(!are_equal(expected, out[i],[i], in2[i], in3[i], expected), op))
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+template <class ternary_op>
+int test_ternary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, ternary_op op)
+    cl_mem buffers[4];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+    typedef typename ternary_op::in1_type INPUT1;
+    typedef typename ternary_op::in2_type INPUT2;
+    typedef typename ternary_op::in3_type INPUT3;
+    typedef typename ternary_op::out_type OUTPUT;
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT1>(device)
+         && type_supported<INPUT2>(device)
+         && type_supported<INPUT3>(device)
+         && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    std::string code_str = generate_kernel_ternary<ternary_op, INPUT1, INPUT2, INPUT3, OUTPUT>(op);
+    std::string kernel_name = op.get_kernel_name();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    std::vector<INPUT1> in1_spec_cases = op.in1_special_cases();
+    std::vector<INPUT2> in2_spec_cases = op.in2_special_cases();
+    std::vector<INPUT3> in3_spec_cases = op.in3_special_cases();
+    prepare_special_cases(in1_spec_cases, in2_spec_cases, in3_spec_cases);
+    std::vector<INPUT1> input1 = generate_input<INPUT1>(count, op.min1(), op.max1(), in1_spec_cases);
+    std::vector<INPUT2> input2 = generate_input<INPUT2>(count, op.min2(), op.max2(), in2_spec_cases);
+    std::vector<INPUT3> input3 = generate_input<INPUT3>(count, op.min3(), op.max3(), in3_spec_cases);
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT1) * input1.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    buffers[1] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT2) * input2.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    buffers[2] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT3) * input3.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    buffers[3] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT1) * input1.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(INPUT2) * input2.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[2], CL_TRUE, 0, sizeof(INPUT3) * input3.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof(buffers[2]), &buffers[2]);
+    err |= clSetKernelArg(kernel, 3, sizeof(buffers[3]), &buffers[3]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[3], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (!verify_ternary(input1, input2, input3, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "test_%s %s(%s, %s, %s) failed", op.str().c_str(),
+            type_name<OUTPUT>().c_str(),
+            type_name<INPUT1>().c_str(),
+            type_name<INPUT2>().c_str(),
+            type_name<INPUT3>().c_str()
+        );
+    }
+    log_info(
+        "test_%s %s(%s, %s, %s) passed\n", op.str().c_str(),
+        type_name<OUTPUT>().c_str(),
+        type_name<INPUT1>().c_str(),
+        type_name<INPUT2>().c_str(),
+        type_name<INPUT3>().c_str()
+    );
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(buffers[2]);
+    clReleaseMemObject(buffers[3]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
diff --git a/test_conformance/clcpp/utils_test/unary.hpp b/test_conformance/clcpp/utils_test/unary.hpp
new file mode 100644
index 0000000..2dbc647
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/unary.hpp
@@ -0,0 +1,261 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <algorithm>
+#include <string>
+#include <cmath>
+#include "../common.hpp"
+#include "detail/base_func_type.hpp"
+#include "generate_inputs.hpp"
+#include "compare.hpp"
+template<class IN1, class OUT1>
+struct unary_func : public detail::base_func_type<OUT1>
+    typedef IN1 in_type;
+    typedef OUT1 out_type;
+    virtual ~unary_func() {};
+    virtual std::string str() = 0;
+    // Return string with function type, for example: int(float).
+    std::string decl_str()
+    {
+        return type_name<OUT1>() + "(" + type_name<IN1>() + ")";
+    }
+    // Return true if IN1 type in OpenCL kernel should be treated
+    // as bool type; false otherwise.
+    bool is_in1_bool()
+    {
+        return false;
+    }
+    // Return min value that can be used as a first argument.
+    IN1 min1()
+    {
+        return detail::get_min<IN1>();
+    }
+    // Return max value that can be used as a first argument.
+    IN1 max1()
+    {
+        return detail::get_max<IN1>();
+    }
+    // This returns a list of special cases input values we want to
+    // test.
+    std::vector<IN1> in_special_cases()
+    {
+        return { };
+    }
+    // Max error. Error should be raised if
+    // abs(result - expected) > delta(.., expected)
+    //
+    // Default value: 0.001 * expected
+    //
+    // (This effects how are_equal() function works,
+    // it may not have effect if verify() method in derived
+    // class does not use are_equal() function.)
+    //
+    // Only for FP numbers/vectors
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const IN1& in1, const T& expected)
+    {
+        typedef
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        // Take care of unused variable warning
+        (void) in1;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_unary(func_type func)
+    std::string in1_value = "input[gid]";
+    // Convert uintN to boolN values
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in_type>::value == 1 ? "" : std::to_string(vector_size<in_type>::value);
+        in1_value = "(input[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ");";
+    // Convert boolN result of funtion func_type to uintN
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + "))";
+    }
+    return
+        "__kernel void " + func.get_kernel_name() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_unary(func_type func)
+    std::string headers = func.headers();
+    std::string in1_value = "input[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in_type>::value == 1 ? "" : std::to_string(vector_size<in_type>::value);
+        in1_value = "(input[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_cast<int" + i + ">(" + func.str() + "(" + in1_value + "))";
+    }
+    if(func.is_out_bool() || func.is_in1_bool())
+    {
+        if(headers.find("#include <opencl_convert>") == std::string::npos)
+        {
+            headers += "#include <opencl_convert>\n";
+        }
+    }
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name<in_type>() +  "[]> input,"
+                                              "global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+template<class INPUT, class OUTPUT, class unary_op>
+bool verify_unary(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, unary_op op)
+    for(size_t i = 0; i < in.size(); i++)
+    {
+        auto expected = op(in[i]);
+        if(!are_equal(expected, out[i],[i], expected), op))
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+template <class unary_op>
+int test_unary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, unary_op op)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+    typedef typename unary_op::in_type INPUT;
+    typedef typename unary_op::out_type OUTPUT;
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    std::string code_str = generate_kernel_unary<unary_op, INPUT, OUTPUT>(op);
+    std::string kernel_name = op.get_kernel_name();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT) * input.size(), NULL,  &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    buffers[1] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL,  &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (!verify_unary(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
diff --git a/test_conformance/clcpp/vload_vstore/CMakeLists.txt b/test_conformance/clcpp/vload_vstore/CMakeLists.txt
new file mode 100644
index 0000000..c66cb6f
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/vload_vstore/common.hpp b/test_conformance/clcpp/vload_vstore/common.hpp
new file mode 100644
index 0000000..84cd539
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/common.hpp
@@ -0,0 +1,81 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <type_traits>
+#include <cmath>
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include "half_utils.hpp"
+// Generates cl_half input
+std::vector<cl_half> generate_half_input(size_t count,
+                                         const cl_float& min,
+                                         const cl_float& max,
+                                         const std::vector<cl_half> special_cases)
+    std::vector<cl_half> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<cl_float> dis(min, max);
+    for(auto& i : input)
+    {
+        i = float2half_rte(dis(gen));
+    }
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+// Generates input for vload_vstore tests, we can't just simply use function
+// generate_input<type>(...), because cl_half is typedef of cl_short (but generating
+// cl_shorts and generating cl_halfs are different operations).
+template <class type>
+std::vector<type> vload_vstore_generate_input(size_t count,
+                                              const type& min,
+                                              const type& max, 
+                                              const std::vector<type> special_cases,
+                                              const bool generate_half,
+                                              typename std::enable_if<
+                                                  std::is_same<type, cl_half>::value
+                                              >::type* = 0)
+    if(!generate_half)
+    {
+        return generate_input<type>(count, min, max, special_cases);
+    }
+    return generate_half_input(count, -(CL_HALF_MAX/4.f), (CL_HALF_MAX/4.f), special_cases);
+// If !std::is_same<type, cl_half>::value, we can just use generate_input<type>(...).
+template <class type>
+std::vector<type> vload_vstore_generate_input(size_t count,
+                                              const type& min,
+                                              const type& max, 
+                                              const std::vector<type> special_cases,
+                                              const bool generate_half,
+                                              typename std::enable_if<
+                                                  !std::is_same<type, cl_half>::value
+                                              >::type* = 0)
+    return generate_input<type>(count, min, max, special_cases);
diff --git a/test_conformance/clcpp/vload_vstore/half_utils.hpp b/test_conformance/clcpp/vload_vstore/half_utils.hpp
new file mode 100644
index 0000000..5c60599
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/half_utils.hpp
@@ -0,0 +1,136 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include <cmath>
+namespace detail 
+template<class INT_TYPE>
+inline int clz(INT_TYPE x)
+    int count = 0;
+    if(std::is_unsigned<INT_TYPE>::value)
+    {
+        cl_ulong value = x;
+        value <<= 8 * sizeof(value) - (8 * sizeof(x));
+        for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+        {
+            value <<= 1;
+        }
+    }
+    else
+    {            
+        cl_long value = x;
+        value <<= 8 * sizeof(value) - (8 * sizeof(x));
+        for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+        {
+            value <<= 1;
+        }
+    }
+    return count;
+} // namespace detail 
+inline cl_float half2float(cl_half us)
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ cl_uint u; cl_float f;}uu;
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+        int shift = detail::clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+            return uu.f;
+        }
+    exponent += 127 - 15;
+    exponent <<= 23;
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+    return uu.f;
+inline cl_ushort float2half_rte(cl_float f)
+    union{ cl_float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    cl_float x = fabsf(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+    }
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+    return (u.u >> (24-11)) | sign;
diff --git a/test_conformance/clcpp/vload_vstore/main.cpp b/test_conformance/clcpp/vload_vstore/main.cpp
new file mode 100644
index 0000000..3893905
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/main.cpp
@@ -0,0 +1,25 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "vload_funcs.hpp"
+#include "vstore_funcs.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/vload_vstore/vload_funcs.hpp b/test_conformance/clcpp/vload_vstore/vload_funcs.hpp
new file mode 100644
index 0000000..f0bbcfc
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/vload_funcs.hpp
@@ -0,0 +1,363 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <iterator>
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include "common.hpp"
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vload(func_type func)
+    std::string input1_type_str = type_name<in_type>();
+    if(func.is_in1_half())
+    {
+        input1_type_str = "half";
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(N == 3)
+    {
+        output1_type_str[output1_type_str.size() - 1] = '3';
+    }
+    return
+        "__kernel void test_" + func.str() + "(global " + input1_type_str + " *input, global " + output1_type_str + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + func.str() + std::to_string(N) + "(gid, input);\n"
+        "}\n";
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vload(func_type func)
+    std::string input1_type_str = type_name<in_type>();
+    if(func.is_in1_half())
+    {
+        input1_type_str = "half";
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(N == 3)
+    {
+        output1_type_str[output1_type_str.size() - 1] = '3';
+    }
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + input1_type_str +  "[]> input,"
+                                              "global_ptr<" + output1_type_str + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + func.str() + "<" + std::to_string(N) + ">(gid, input.get());\n"
+        "}\n";
+template<class INPUT, class OUTPUT, class vload_op>
+bool verify_vload(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, vload_op op)
+    for(size_t i = 0; i < out.size(); i++)
+    {
+        auto expected = op(i, in.begin());
+        for(size_t j = 0; j < vload_op::vector_size; j++)
+        {
+            size_t idx = (i * vector_size<OUTPUT>::value) + j;
+            if(!are_equal(expected.s[j], out[i].s[j],[idx], expected.s[j]), op))
+            {
+                print_error_msg(expected, out[i], i, op);
+                return false;
+            }
+        }
+    }
+    return true;
+template <class vload_op>
+int test_vload_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, vload_op op)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+    typedef typename vload_op::in_type INPUT;
+    typedef typename vload_op::out_type OUTPUT;
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    std::string code_str = generate_kernel_vload<vload_op, INPUT, OUTPUT, vload_op::vector_size>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    std::vector<INPUT> input = vload_vstore_generate_input<INPUT>(
+        count * vector_size<OUTPUT>::value, op.min1(), op.max1(), op.in_special_cases(), op.is_in1_half()
+    );
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (!verify_vload(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed",
+            op.str().c_str(),
+            type_name<OUTPUT>().c_str(),
+            type_name<INPUT>().c_str()
+        );
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+template <class IN1, cl_int N /* Vector size */>
+struct vload_func : public unary_func<
+                        IN1,
+                        typename make_vector_type<IN1, N>::type /* create IN1N type */
+                    >
+    typedef typename make_vector_type<IN1, N>::type result_type;
+    const static size_t vector_size = N;
+    std::string str()
+    {
+        return "vload";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+    template<class Iterator>
+    result_type operator()(const size_t offset, Iterator x)
+    {
+        static_assert(
+            !is_vector_type<IN1>::value,
+            "IN1 must be scalar type"
+        );
+        static_assert(
+            std::is_same<typename std::iterator_traits<Iterator>::value_type, IN1>::value,
+            "std::iterator_traits<Iterator>::value_type must be IN1"
+        );
+        typedef typename std::iterator_traits<Iterator>::difference_type diff_type;
+        result_type r;
+        Iterator temp = x + static_cast<diff_type>(offset * N);
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = *temp;
+            temp++;
+        }
+        return r;
+    }
+    bool is_in1_half()
+    {
+        return false;
+    }
+template <cl_int N /* Vector size */>
+struct vload_half_func : public unary_func<
+                            cl_half,
+                            typename make_vector_type<cl_float, N>::type /* create IN1N type */
+                         >
+    typedef typename make_vector_type<cl_float, N>::type result_type;
+    const static size_t vector_size = N;
+    std::string str()
+    {
+        return "vload_half";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+    template<class Iterator>
+    result_type operator()(const size_t offset, Iterator x)
+    {
+        static_assert(
+            std::is_same<typename std::iterator_traits<Iterator>::value_type, cl_half>::value,
+            "std::iterator_traits<Iterator>::value_type must be cl_half"
+        );
+        typedef typename std::iterator_traits<Iterator>::difference_type diff_type;
+        result_type r;
+        Iterator temp = x + static_cast<diff_type>(offset * N);
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = half2float(*temp);
+            temp++;
+        }
+        return r;
+    }
+    bool is_in1_half()
+    {
+        return true;
+    }
+template <cl_int N /* Vector size */>
+struct vloada_half_func : public unary_func<
+                            cl_half,
+                            typename make_vector_type<cl_float, N>::type /* create IN1N type */
+                         >
+    typedef typename make_vector_type<cl_float, N>::type result_type;
+    const static size_t vector_size = N;
+    std::string str()
+    {
+        return "vloada_half";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+    template<class Iterator>
+    result_type operator()(const size_t offset, Iterator x)
+    {
+        static_assert(
+            std::is_same<typename std::iterator_traits<Iterator>::value_type, cl_half>::value,
+            "std::iterator_traits<Iterator>::value_type must be cl_half"
+        );
+        typedef typename std::iterator_traits<Iterator>::difference_type diff_type;
+        result_type r;
+        size_t alignment = N == 3 ? 4 : N;
+        Iterator temp = x + static_cast<diff_type>(offset * alignment);
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = half2float(*temp);
+            temp++;
+        }
+        return r;
+    }
+    bool is_in1_half()
+    {
+        return true;
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    last_error = test_vload_func( \
+        device, context, queue, n_elems, CLASS \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_uint,  2>()))
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_float, 4>()))
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_short, 8>()))
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_int, 16>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<2>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<3>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<4>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<8>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<16>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<2>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<3>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<4>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<8>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<16>()))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp b/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp
new file mode 100644
index 0000000..5bf83f5
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp
@@ -0,0 +1,348 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include <iterator>
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+#include "common.hpp"
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vstore(func_type func)
+    std::string input1_type_str = type_name<in_type>();
+    if(N == 3)
+    {
+        input1_type_str[input1_type_str.size() - 1] = '3';
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(func.is_out_half())
+    {
+        output1_type_str = "half";
+    }
+    return
+        "__kernel void test_" + func.str() + "(global " + input1_type_str + " *input, global " + output1_type_str + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + func.str() + std::to_string(N) + "(input[gid], gid, output);\n"
+        "}\n";
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vstore(func_type func)
+    std::string input1_type_str = type_name<in_type>();
+    if(N == 3)
+    {
+        input1_type_str[input1_type_str.size() - 1] = '3';
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(func.is_out_half())
+    {
+        output1_type_str = "half";
+    }
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + input1_type_str +  "[]> input,"
+                                              "global_ptr<" + output1_type_str + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + func.str() + "(input[gid], gid, output.get());\n"
+        "}\n";
+template<class INPUT, class OUTPUT, class vload_op>
+bool verify_vstore(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, vload_op op)
+    for(size_t i = 0; i < in.size(); i++)
+    {
+        auto expected = op(in[i]);
+        for(size_t j = 0; j < vload_op::vector_size; j++)
+        {
+            size_t idx = (i * vload_op::vec_alignment) + j;
+            if(!are_equal(expected.s[j], out[idx],[i], expected).s[j], op))
+            {
+                print_error_msg(expected.s[j], out[idx], idx, op);
+                return false;
+            }
+        }
+    }
+    return true;
+template <class vload_op>
+int test_vstore_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, vload_op op)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+    typedef typename vload_op::in_type INPUT;
+    typedef typename vload_op::out_type OUTPUT;
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    std::string code_str = generate_kernel_vstore<vload_op, INPUT, OUTPUT, vload_op::vector_size>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count * vector_size<INPUT>::value);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (!verify_vstore(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+template <class T, cl_int N /* Vector size */>
+struct vstore_func : public unary_func<
+                        typename make_vector_type<T, N>::type,
+                        T
+                     >
+    typedef typename make_vector_type<T, N>::type input1_type;
+    typedef typename make_vector_type<T, N>::type result_type;
+    const static size_t vector_size = N;
+    const static size_t vec_alignment = N;
+    std::string str()
+    {
+        return "vstore";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+    result_type operator()(const input1_type& in)
+    {
+        static_assert(
+            !is_vector_type<T>::value,
+            "T must be scalar type"
+        );
+        return in;
+    }
+    bool is_out_half()
+    {
+        return false;
+    }
+template <cl_int N /* Vector size */>
+struct vstore_half_func : public unary_func<
+                            typename make_vector_type<cl_float, N>::type,
+                            cl_half
+                          >
+    typedef typename make_vector_type<cl_float, N>::type input1_type;
+    typedef typename make_vector_type<cl_half, N>::type result_type;
+    const static size_t vector_size = N;
+    const static size_t vec_alignment = N;
+    std::string str()
+    {
+        return "vstore_half";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+    result_type operator()(const input1_type& in)
+    {
+        result_type r;
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = float2half_rte(in.s[i]);
+        }
+        return r;
+    }
+    input1_type min1()
+    {
+        return detail::make_value<input1_type>(-512.f);
+    }
+    input1_type max1()
+    {
+        return detail::make_value<input1_type>(512.f);
+    }
+    bool is_out_half()
+    {
+        return true;
+    }
+template <cl_int N /* Vector size */>
+struct vstorea_half_func : public unary_func<
+                            typename make_vector_type<cl_float, N>::type,
+                            cl_half
+                          >
+    typedef typename make_vector_type<cl_float, N>::type input1_type;
+    typedef typename make_vector_type<cl_half, N>::type result_type;
+    const static size_t vector_size = N;
+    const static size_t vec_alignment = N == 3 ? 4 : N;
+    std::string str()
+    {
+        return "vstorea_half";
+    }
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+    result_type operator()(const input1_type& in)
+    {
+        result_type r;
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = float2half_rte(in.s[i]);
+        }
+        return r;
+    }
+    input1_type min1()
+    {
+        return detail::make_value<input1_type>(-512.f);
+    }
+    input1_type max1()
+    {
+        return detail::make_value<input1_type>(512.f);
+    }
+    bool is_out_half()
+    {
+        return true;
+    }
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    last_error = test_vstore_func( \
+        device, context, queue, n_elems, CLASS \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_uint, 2>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_uint, 3>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_int, 4>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_float, 8>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_uchar, 16>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<2>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<3>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<4>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<8>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<16>()))
+    TEST_VSTORE_FUNC_MACRO((vstorea_half_func<2>()))
+    TEST_VSTORE_FUNC_MACRO((vstorea_half_func<3>()))
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
diff --git a/test_conformance/clcpp/workgroups/CMakeLists.txt b/test_conformance/clcpp/workgroups/CMakeLists.txt
new file mode 100644
index 0000000..812e982
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/workgroups/common.hpp b/test_conformance/clcpp/workgroups/common.hpp
new file mode 100644
index 0000000..ab7b100
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/common.hpp
@@ -0,0 +1,97 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <string>
+#include <vector>
+#include <limits>
+enum class work_group_op : int {
+    add, min, max    
+std::string to_string(work_group_op op)
+    switch (op)
+    {
+        case work_group_op::add:
+            return "add";
+        case work_group_op::min:
+            return "min";
+        case work_group_op::max:
+            return "max";
+        default:
+            break;
+    }
+    return "";
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_input(size_t count, size_t wg_size)
+    std::vector<CL_INT_TYPE> input(count, CL_INT_TYPE(1));      
+    switch (op)
+    {
+        case work_group_op::add:
+            return input;
+        case work_group_op::min:
+            {                
+                size_t j = wg_size;
+                for(size_t i = 0; i < count; i++)
+                {                
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j--;
+                    if(j == 0)
+                    {
+                        j = wg_size;
+                    }
+                }
+            }
+            break;         
+        case work_group_op::max:          
+            {                
+                size_t j = 0;
+                for(size_t i = 0; i < count; i++)
+                {                
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j++;
+                    if(j == wg_size)
+                    {
+                        j = 0;
+                    }
+                }
+            }
+    }
+    return input;
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_output(size_t count, size_t wg_size)
+    switch (op)
+    {
+        case work_group_op::add:
+            return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+        case work_group_op::min:  
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::max)());       
+        case work_group_op::max:          
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::min)());
+    }
+    return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
diff --git a/test_conformance/clcpp/workgroups/main.cpp b/test_conformance/clcpp/workgroups/main.cpp
new file mode 100644
index 0000000..508753c
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/main.cpp
@@ -0,0 +1,29 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_wg_all.hpp"
+#include "test_wg_any.hpp"
+#include "test_wg_broadcast.hpp"
+#include "test_wg_reduce.hpp"
+#include "test_wg_scan_inclusive.hpp"
+#include "test_wg_scan_exclusive.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/workgroups/test_wg_all.hpp b/test_conformance/clcpp/workgroups/test_wg_all.hpp
new file mode 100644
index 0000000..103ce2b
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_all.hpp
@@ -0,0 +1,218 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_all_kernel_code()
+    return
+        "__kernel void test_wg_all(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    int result = work_group_all(input[tid] < input[tid+1]);\n"
+        "    if(result == 0) {\n        output[tid] = 0;\n        return;\n    }\n"
+        "    output[tid] = 1;\n"
+        "}\n";
+std::string generate_wg_all_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_all(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = work_group_all(input[tid] < input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+int verify_wg_all(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < count; i += wg_size)
+    {
+        // Work-group all
+        bool all = true;
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if(!(in[i+j] < in[i+j+1]))
+            {
+                all = false;
+                break;
+            }
+        }
+        // Convert bool to uint
+        cl_uint all_uint = all ? 1 : 0;
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if (all_uint != out[i + j])
+            {
+                log_info(
+                    "work_group_all %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<cl_uint>().c_str(),
+                    i + j,
+                    static_cast<size_t>(all_uint),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+std::vector<cl_uint> generate_input_wg_all(size_t count, size_t wg_size)
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of workgroups input[tid] < input[tid+1] will
+        // generate false, that means for that workgroups work_group_all()
+        // should return false
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+std::vector<cl_uint> generate_output_wg_all(size_t count, size_t wg_size)
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+int work_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+    std::string code_str = generate_wg_all_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all", "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all");
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<cl_uint> input = generate_input_wg_all(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_wg_all(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (verify_wg_all(input, output, flat_work_size, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_all failed");
+    }
+    log_info("work_group_all passed\n");
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int err = CL_SUCCESS;
+    err = work_group_all(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    if(err != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
diff --git a/test_conformance/clcpp/workgroups/test_wg_any.hpp b/test_conformance/clcpp/workgroups/test_wg_any.hpp
new file mode 100644
index 0000000..724b3ce
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_any.hpp
@@ -0,0 +1,218 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_any_kernel_code()
+    return
+        "__kernel void test_wg_any(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    int result = work_group_any(input[tid] == input[tid+1]);\n"
+        "    if(result == 0) {\n        output[tid] = 0;\n        return;\n    }\n"
+        "    output[tid] = 1;\n"
+        "}\n";
+std::string generate_wg_any_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_any(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = work_group_any(input[tid] == input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+int verify_wg_any(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < count; i += wg_size)
+    {
+        // Work-group any
+        bool any = false;
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if(in[i+j] == in[i+j+1])
+            {
+                any = true;
+                break;
+            }
+        }
+        // Convert bool to uint
+        cl_uint any_uint = any ? 1 : 0;
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if (any_uint != out[i + j])
+            {
+                log_info(
+                    "work_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<cl_uint>().c_str(),
+                    i + j,
+                    static_cast<size_t>(any_uint),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+std::vector<cl_uint> generate_input_wg_any(size_t count, size_t wg_size)
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of workgroups input[tid] == input[tid+1] will
+        // generate true, that means for that workgroups work_group_any()
+        // should return true
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+std::vector<cl_uint> generate_output_wg_any(size_t count, size_t wg_size)
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+int work_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+    std::string code_str = generate_wg_any_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any", "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any");
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<cl_uint> input = generate_input_wg_any(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_wg_any(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (verify_wg_any(input, output, flat_work_size, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_any failed");
+    }
+    log_info("work_group_any passed\n");
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int err = CL_SUCCESS;
+    err = work_group_any(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    if(err != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
diff --git a/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp b/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp
new file mode 100644
index 0000000..4dc5559
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp
@@ -0,0 +1,458 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_broadcast_1D_kernel_code()
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "    uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
+        "    output[tid] = result;\n"
+        "}\n";
+std::string generate_wg_broadcast_2D_kernel_code()
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid_x = get_global_id(0);\n"
+        "    ulong tid_y = get_global_id(1);\n"
+        "    size_t x = get_group_id(0) % get_local_size(0);\n"
+        "    size_t y = get_group_id(1) % get_local_size(1);\n"
+        "    size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
+        "    uint result = work_group_broadcast(input[idx], x, y);\n"
+        "    output[idx] = result;\n"
+        "}\n";
+std::string generate_wg_broadcast_3D_kernel_code()
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid_x = get_global_id(0);\n"
+        "    ulong tid_y = get_global_id(1);\n"
+        "    ulong tid_z = get_global_id(2);\n"
+        "    size_t x = get_group_id(0) % get_local_size(0);\n"
+        "    size_t y = get_group_id(1) % get_local_size(1);\n"
+        "    size_t z = get_group_id(2) % get_local_size(2);\n"
+        "    ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
+        "    uint result = work_group_broadcast(input[idx], x, y, z);\n"
+        "    output[idx] = result;\n"
+        "}\n";
+std::string generate_wg_broadcast_1D_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
+           "    output[tid] = result;\n"
+           "}\n";
+std::string generate_wg_broadcast_2D_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid_x = get_global_id(0);\n"
+           "    ulong tid_y = get_global_id(1);\n"
+           "    size_t x = get_group_id(0) % get_local_size(0);\n"
+           "    size_t y = get_group_id(1) % get_local_size(1);\n"
+           "    size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
+           "    uint result = work_group_broadcast(input[idx], x, y);\n"
+           "    output[idx] = result;\n"
+           "}\n";
+std::string generate_wg_broadcast_3D_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid_x = get_global_id(0);\n"
+           "    ulong tid_y = get_global_id(1);\n"
+           "    ulong tid_z = get_global_id(2);\n"
+           "    size_t x = get_group_id(0) % get_local_size(0);\n"
+           "    size_t y = get_group_id(1) % get_local_size(1);\n"
+           "    size_t z = get_group_id(2) % get_local_size(2);\n"
+           "    ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
+           "    uint result = work_group_broadcast(input[idx], x, y, z);\n"
+           "    output[idx] = result;\n"
+           "}\n";
+verify_wg_broadcast_1D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t n, size_t wg_size)
+    size_t i, j;
+    size_t group_id;
+    for (i=0,group_id=0; i<n; i+=wg_size,group_id++)
+    {
+        int local_size = (n-i) > wg_size ? wg_size : (n-i);
+        cl_uint broadcast_result = in[i + (group_id % local_size)];
+        for (j=0; j<local_size; j++)
+        {
+            if ( broadcast_result != out[i+j] )
+            {
+                log_info("work_group_broadcast: Error at %lu: expected = %u, got = %u\n", i+j, broadcast_result, out[i+j]);
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+verify_wg_broadcast_2D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
+                       size_t nx, size_t ny,
+                       size_t wg_size_x, size_t wg_size_y)
+    size_t i, j, _i, _j;
+    size_t group_id_x, group_id_y;
+    for (i=0,group_id_y=0; i<ny; i+=wg_size_y,group_id_y++)
+    {
+        size_t y = group_id_y % wg_size_y;
+        size_t local_size_y = (ny-i) > wg_size_y ? wg_size_y : (ny-i);
+        for (_i=0; _i < local_size_y; _i++)
+        {
+            for (j=0,group_id_x=0; j<nx; j+=wg_size_x,group_id_x++)
+            {
+                size_t x = group_id_x % wg_size_x;
+                size_t local_size_x = (nx-j) > wg_size_x ? wg_size_x : (nx-j);
+                cl_uint broadcast_result = in[(i + y) * nx + (j + x)];
+                for (_j=0; _j < local_size_x; _j++)
+                {
+                    size_t indx = (i + _i) * nx + (j + _j);
+                    if ( broadcast_result != out[indx] )
+                    {
+                        log_info("%lu\n", indx);
+                        log_info("%lu\n", ((i + y) * nx + (j + x)));
+                         log_info("%lu\n", out.size());
+                        log_info("work_group_broadcast: Error at (%lu, %lu): expected = %u, got = %u\n", j+_j, i+_i, broadcast_result, out[indx]);
+                        return -1;
+                    }
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+verify_wg_broadcast_3D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
+                       size_t nx, size_t ny, size_t nz,
+                       size_t wg_size_x, size_t wg_size_y, size_t wg_size_z)
+    size_t i, j, k, _i, _j, _k;
+    size_t group_id_x, group_id_y, group_id_z;
+    for (i=0,group_id_z=0; i<nz; i+=wg_size_z,group_id_z++)
+    {
+        size_t z = group_id_z % wg_size_z;
+        size_t local_size_z = (nz-i) > wg_size_z ? wg_size_z : (nz-i);
+        for (_i=0; _i < local_size_z; _i++)
+        {
+            for (j=0,group_id_y=0; j<ny; j+=wg_size_y,group_id_y++)
+            {
+                size_t y = group_id_y % wg_size_y;
+                size_t local_size_y = (ny-j) > wg_size_y ? wg_size_y : (ny-j);
+                for (_j=0; _j < local_size_y; _j++)
+                {
+                    for (k=0,group_id_x=0; k<nx; k+=wg_size_x,group_id_x++)
+                    {
+                        size_t x = group_id_x % wg_size_x;
+                        size_t local_size_x = (nx-k) > wg_size_x ? wg_size_x : (nx-k);
+                        cl_uint broadcast_result = in[(i + z) * ny * nz + (j + y) * nx + (k + x)];
+                        for (_k=0; _k < local_size_x; _k++)
+                        {
+                            size_t indx = (i + _i) * ny * nx + (j + _j) * nx + (k + _k);
+                            if ( broadcast_result != out[indx] )
+                            {
+                                log_info(
+                                    "work_group_broadcast: Error at (%lu, %lu, %lu): expected = %u, got = %u\n",
+                                    k+_k, j+_j, i+_i,
+                                    broadcast_result, out[indx]);
+                                return -1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+std::vector<cl_uint> generate_input_wg_broadcast(size_t count, size_t wg_size)
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(j);
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+std::vector<cl_uint> generate_output_wg_broadcast(size_t count, size_t wg_size)
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+int work_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, size_t dim)
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t flat_wg_size;
+    size_t wg_size[] = { 1, 1, 1};
+    size_t work_size[] = { 1, 1, 1};
+    int err;
+    // Get kernel source code
+    std::string code_str;
+    if(dim > 2) code_str = generate_wg_broadcast_3D_kernel_code();
+    else if(dim > 1) code_str = generate_wg_broadcast_2D_kernel_code();
+    else code_str = generate_wg_broadcast_1D_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast", "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
+    // Get max flat workgroup size
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &flat_wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    // Set local work size
+    wg_size[0] = flat_wg_size;
+    if(dim > 2)
+    {
+        if (flat_wg_size >=512)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 8;
+        }
+        else if (flat_wg_size >= 64)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 4;
+        }
+        else if (flat_wg_size >= 8)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 2;
+        }
+        else
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 1;
+        }
+    }
+    else if(dim > 1)
+    {
+        if (flat_wg_size >= 256)
+        {
+            wg_size[0] = wg_size[1] = 16;
+        }
+        else if (flat_wg_size >=64)
+        {
+            wg_size[0] = wg_size[1] = 8;
+        }
+        else if (flat_wg_size >= 16)
+        {
+            wg_size[0] = wg_size[1] = 4;
+        }
+        else
+        {
+            wg_size[0] = wg_size[1] = 1;
+        }
+    }
+    // Calculate flat local work size
+    flat_wg_size = wg_size[0];
+    if(dim > 1) flat_wg_size *= wg_size[1];
+    if(dim > 2) flat_wg_size *= wg_size[2];
+    // Calculate global work size
+    size_t flat_work_size = count;
+    // 3D
+    if(dim > 2)
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count / 3) / (wg_size[0] * wg_size[1] * wg_size[2]))
+        );
+        work_size[0] = wg_number * wg_size[0];
+        work_size[1] = wg_number * wg_size[1];
+        work_size[2] = wg_number * wg_size[2];
+        flat_work_size = work_size[0] * work_size[1] * work_size[2];
+    }
+    // 2D
+    else if(dim > 1)
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count / 2) / (wg_size[0] * wg_size[1]))
+        );
+        work_size[0] = wg_number * wg_size[0];
+        work_size[1] = wg_number * wg_size[1];
+        flat_work_size = work_size[0] * work_size[1];
+    }
+    // 1D
+    else
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count) / wg_size[0])
+        );
+        flat_work_size = wg_number * wg_size[0];
+        work_size[0] = flat_work_size;
+    }
+    std::vector<cl_uint> input = generate_input_wg_broadcast(flat_work_size, flat_wg_size);
+    std::vector<cl_uint> output = generate_output_wg_broadcast(flat_work_size, flat_wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    int result = CL_SUCCESS;
+    // 3D
+    if(dim > 2)
+    {
+        result = verify_wg_broadcast_3D(
+            input, output,
+            work_size[0], work_size[1], work_size[2],
+            wg_size[0], wg_size[1], wg_size[2]
+        );
+    }
+    // 2D
+    else if(dim > 1)
+    {
+        result = verify_wg_broadcast_2D(
+            input, output,
+            work_size[0], work_size[1],
+            wg_size[0], wg_size[1]
+        );
+    }
+    // 1D
+    else
+    {
+        result = verify_wg_broadcast_1D(
+            input, output,
+            work_size[0],
+            wg_size[0]
+        );
+    }
+    RETURN_ON_ERROR_MSG(result, "work_group_broadcast_%luD failed", dim);
+    log_info("work_group_broadcast_%luD passed\n", dim);
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = work_group_broadcast(device, context, queue, n_elems, 1);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_broadcast(device, context, queue, n_elems, 2);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_broadcast(device, context, queue, n_elems, 3);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
diff --git a/test_conformance/clcpp/workgroups/test_wg_reduce.hpp b/test_conformance/clcpp/workgroups/test_wg_reduce.hpp
new file mode 100644
index 0000000..616cbdb
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_reduce.hpp
@@ -0,0 +1,331 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <limits>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_reduce_kernel_code()
+    return
+        "__kernel void test_wg_reduce(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_reduce_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_reduce_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_reduce(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                        "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_reduce<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+template <class CL_INT_TYPE>
+int verify_wg_reduce_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+        // Work-group sum
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            sum += in[i + j];
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+template <class CL_INT_TYPE>
+int verify_wg_reduce_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+        // Work-group min
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            min = std::min<CL_INT_TYPE>(min, in[i + j]);
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+template <class CL_INT_TYPE>
+int verify_wg_reduce_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+        // Work-group max
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            max = std::max<CL_INT_TYPE>(max, in[i + j]);
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_reduce(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_reduce_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_reduce_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_reduce_max(in, out, wg_size);
+    }
+    return -1;
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+    std::string code_str = generate_wg_reduce_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce", "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce");
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (verify_wg_reduce<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_reduce_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = work_group_reduce<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_reduce<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_reduce<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_reduce<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = work_group_reduce<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_reduce<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_reduce<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_reduce<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = work_group_reduce<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_reduce<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_reduce<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_reduce<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
diff --git a/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp b/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp
new file mode 100644
index 0000000..35ec4b1
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp
@@ -0,0 +1,324 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_exclusive_kernel_code()
+    return
+        "__kernel void test_wg_scan_exclusive(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_exclusive_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_exclusive_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_scan_exclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_exclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            sum += in[i + j];
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            min = (std::min)(min, in[i + j]);
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            max = (std::max)(max, in[i + j]);
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_scan_exclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_scan_exclusive_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_scan_exclusive_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_scan_exclusive_max(in, out, wg_size);
+    }
+    return -1;
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+    std::string code_str = generate_wg_scan_exclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive", "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive");
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (verify_wg_scan_exclusive<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
diff --git a/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp b/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp
new file mode 100644
index 0000000..34096eb
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp
@@ -0,0 +1,324 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <algorithm>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_inclusive_kernel_code()
+    return
+        "__kernel void test_wg_scan_inclusive(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_inclusive_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_inclusive_kernel_code()
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            sum += in[i + j];
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            min = (std::min)(min, in[i + j]);
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            max = (std::max)(max, in[i + j]);
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_scan_inclusive_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_scan_inclusive_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_scan_inclusive_max(in, out, wg_size);
+    }
+    return -1;
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+    std::string code_str = generate_wg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive");
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive", "-cl-std=CL2.0", false);
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive");
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(, 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+    if (verify_wg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
diff --git a/test_conformance/clcpp/workitems/CMakeLists.txt b/test_conformance/clcpp/workitems/CMakeLists.txt
new file mode 100644
index 0000000..0035933
--- /dev/null
+++ b/test_conformance/clcpp/workitems/CMakeLists.txt
@@ -0,0 +1,7 @@
+    main.cpp
diff --git a/test_conformance/clcpp/workitems/main.cpp b/test_conformance/clcpp/workitems/main.cpp
new file mode 100644
index 0000000..1c322df
--- /dev/null
+++ b/test_conformance/clcpp/workitems/main.cpp
@@ -0,0 +1,25 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "../common.hpp"
+#include "test_workitems.hpp"
+int main(int argc, const char *argv[])
+    auto& tests = autotest::test_suite::global_test_suite().test_defs;
+    return runTestHarness(argc, argv, tests.size(),, false, false, 0);
diff --git a/test_conformance/clcpp/workitems/test_workitems.hpp b/test_conformance/clcpp/workitems/test_workitems.hpp
new file mode 100644
index 0000000..099ef34
--- /dev/null
+++ b/test_conformance/clcpp/workitems/test_workitems.hpp
@@ -0,0 +1,417 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <vector>
+#include <algorithm>
+#include <random>
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+namespace test_workitems {
+struct test_options
+    bool uniform_work_group_size;
+    size_t max_count;
+    size_t num_tests;
+struct output_type
+    cl_uint  work_dim;
+    cl_ulong global_size[3];
+    cl_ulong global_id[3];
+    cl_ulong local_size[3];
+    cl_ulong enqueued_local_size[3];
+    cl_ulong local_id[3];
+    cl_ulong num_groups[3];
+    cl_ulong group_id[3];
+    cl_ulong global_offset[3];
+    cl_ulong global_linear_id;
+    cl_ulong local_linear_id;
+    cl_ulong sub_group_size;
+    cl_ulong max_sub_group_size;
+    cl_ulong num_sub_groups;
+    cl_ulong enqueued_num_sub_groups;
+    cl_ulong sub_group_id;
+    cl_ulong sub_group_local_id;
+const std::string source_common = R"(
+struct output_type
+    uint  work_dim;
+    ulong global_size[3];
+    ulong global_id[3];
+    ulong local_size[3];
+    ulong enqueued_local_size[3];
+    ulong local_id[3];
+    ulong num_groups[3];
+    ulong group_id[3];
+    ulong global_offset[3];
+    ulong global_linear_id;
+    ulong local_linear_id;
+    ulong sub_group_size;
+    ulong max_sub_group_size;
+    ulong num_sub_groups;
+    ulong enqueued_num_sub_groups;
+    ulong sub_group_id;
+    ulong sub_group_local_id;
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const std::string source =
+    source_common +
+    R"(
+        #ifdef cl_khr_subgroups
+        #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+        #endif
+        kernel void test(global struct output_type *output)
+        {
+           const ulong gid = get_global_linear_id();
+           output[gid].work_dim = get_work_dim();
+           for (uint dimindx = 0; dimindx < 3; dimindx++)
+           {
+               output[gid].global_size[dimindx] = get_global_size(dimindx);
+               output[gid].global_id[dimindx] = get_global_id(dimindx);
+               output[gid].local_size[dimindx] = get_local_size(dimindx);
+               output[gid].enqueued_local_size[dimindx] = get_enqueued_local_size(dimindx);
+               output[gid].local_id[dimindx] = get_local_id(dimindx);
+               output[gid].num_groups[dimindx] = get_num_groups(dimindx);
+               output[gid].group_id[dimindx] = get_group_id(dimindx);
+               output[gid].global_offset[dimindx] = get_global_offset(dimindx);
+           }
+           output[gid].global_linear_id = get_global_linear_id();
+           output[gid].local_linear_id = get_local_linear_id();
+        #ifdef cl_khr_subgroups
+           output[gid].sub_group_size = get_sub_group_size();
+           output[gid].max_sub_group_size = get_max_sub_group_size();
+           output[gid].num_sub_groups = get_num_sub_groups();
+           output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+           output[gid].sub_group_id = get_sub_group_id();
+           output[gid].sub_group_local_id = get_sub_group_local_id();
+        #endif
+        }
+    )";
+const std::string source =
+    R"(
+        #include <opencl_memory>
+        #include <opencl_work_item>
+        using namespace cl;
+    )" +
+    source_common +
+    R"(
+        kernel void test(global_ptr<output_type[]> output)
+        {
+           const size_t gid = get_global_linear_id();
+           output[gid].work_dim = get_work_dim();
+           for (uint dimindx = 0; dimindx < 3; dimindx++)
+           {
+               output[gid].global_size[dimindx] = get_global_size(dimindx);
+               output[gid].global_id[dimindx] = get_global_id(dimindx);
+               output[gid].local_size[dimindx] = get_local_size(dimindx);
+               output[gid].enqueued_local_size[dimindx] = get_enqueued_local_size(dimindx);
+               output[gid].local_id[dimindx] = get_local_id(dimindx);
+               output[gid].num_groups[dimindx] = get_num_groups(dimindx);
+               output[gid].group_id[dimindx] = get_group_id(dimindx);
+               output[gid].global_offset[dimindx] = get_global_offset(dimindx);
+           }
+           output[gid].global_linear_id = get_global_linear_id();
+           output[gid].local_linear_id = get_local_linear_id();
+           output[gid].sub_group_size = get_sub_group_size();
+           output[gid].max_sub_group_size = get_max_sub_group_size();
+           output[gid].num_sub_groups = get_num_sub_groups();
+           output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+           output[gid].sub_group_id = get_sub_group_id();
+           output[gid].sub_group_local_id = get_sub_group_local_id();
+        }
+    )";
+#define CHECK_EQUAL(result, expected, func_name) \
+    if (result != expected) \
+    { \
+        RETURN_ON_ERROR_MSG(-1, \
+            "Function %s failed. Expected: %s, got: %s", func_name, \
+            format_value(expected).c_str(), format_value(result).c_str() \
+        ); \
+    }
+#define CHECK(expression, func_name) \
+    if (expression) \
+    { \
+        RETURN_ON_ERROR_MSG(-1, \
+            "Function %s returned incorrect result", func_name \
+        ); \
+    }
+int test_workitems(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+    int error = CL_SUCCESS;
+    cl_program program;
+    cl_kernel kernel;
+    std::string kernel_name = "test";
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    size_t max_work_group_size;
+    size_t max_local_sizes[3];
+    error = get_max_allowed_work_group_size(context, kernel, &max_work_group_size, max_local_sizes);
+    RETURN_ON_ERROR(error)
+    bool check_sub_groups = true;
+    bool check_sub_groups_limits = true;
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    check_sub_groups = false;
+    check_sub_groups_limits = false;
+    if (is_extension_available(device, "cl_khr_subgroups"))
+    {
+        Version version = get_device_cl_version(device);
+        RETURN_ON_ERROR(error)
+        check_sub_groups_limits = (version >= Version(2,1)); // clGetKernelSubGroupInfo is from 2.1
+        check_sub_groups = true;
+    }
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> count_dis(1, options.max_count);
+    for (int test = 0; test < options.num_tests; test++)
+    {
+        for (size_t dim = 1; dim <= 3; dim++)
+        {
+            size_t global_size[3] = { 1, 1, 1 };
+            size_t global_offset[3] = { 0, 0, 0 };
+            size_t enqueued_local_size[3] = { 1, 1, 1 };
+            size_t count = count_dis(gen);
+            std::uniform_int_distribution<size_t> global_size_dis(1, static_cast<size_t>(pow(count, 1.0 / dim)));
+            for (int d = 0; d < dim; d++)
+            {
+                std::uniform_int_distribution<size_t> enqueued_local_size_dis(1, max_local_sizes[d]);
+                global_size[d] = global_size_dis(gen);
+                global_offset[d] = global_size_dis(gen);
+                enqueued_local_size[d] = enqueued_local_size_dis(gen);
+            }
+            // Local work size must not exceed CL_KERNEL_WORK_GROUP_SIZE for this kernel
+            while (enqueued_local_size[0] * enqueued_local_size[1] * enqueued_local_size[2] > max_work_group_size)
+            {
+                // otherwise decrease it until it fits
+                for (int d = 0; d < dim; d++)
+                {
+                    enqueued_local_size[d] = (std::max)((size_t)1, enqueued_local_size[d] / 2);
+                }
+            }
+            if (options.uniform_work_group_size)
+            {
+                for (int d = 0; d < dim; d++)
+                {
+                    global_size[d] = get_uniform_global_size(global_size[d], enqueued_local_size[d]);
+                }
+            }
+            count = global_size[0] * global_size[1] * global_size[2];
+            cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * count, NULL, &error);
+            RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+            const char pattern = 0;
+            error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+            error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+            error = clEnqueueNDRangeKernel(queue, kernel, dim, global_offset, global_size, enqueued_local_size, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+            std::vector<output_type> output(count);
+            error = clEnqueueReadBuffer(
+                queue, output_buffer, CL_TRUE,
+                0, sizeof(output_type) * count,
+                static_cast<void *>(,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+            error = clReleaseMemObject(output_buffer);
+            RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+            size_t sub_group_count_for_ndrange = 0;
+            size_t max_sub_group_size_for_ndrange = 0;
+            if (check_sub_groups_limits)
+            {
+                error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
+                    sizeof(size_t) * dim, enqueued_local_size,
+                    sizeof(size_t), &sub_group_count_for_ndrange, NULL);
+                RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+                error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+                    sizeof(size_t) * dim, enqueued_local_size,
+                    sizeof(size_t), &max_sub_group_size_for_ndrange, NULL);
+                RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            }
+            size_t num_groups[3];
+            for (int d = 0; d < 3; d++)
+                num_groups[d] = static_cast<size_t>(std::ceil(static_cast<double>(global_size[d]) / enqueued_local_size[d]));
+            size_t group_id[3];
+            for (group_id[0] = 0; group_id[0] < num_groups[0]; group_id[0]++)
+            for (group_id[1] = 0; group_id[1] < num_groups[1]; group_id[1]++)
+            for (group_id[2] = 0; group_id[2] < num_groups[2]; group_id[2]++)
+            {
+                size_t local_size[3];
+                for (int d = 0; d < 3; d++)
+                {
+                    if (group_id[d] == num_groups[d] - 1)
+                        local_size[d] = global_size[d] - group_id[d] * enqueued_local_size[d];
+                    else
+                        local_size[d] = enqueued_local_size[d];
+                }
+                size_t local_id[3];
+                for (local_id[0] = 0; local_id[0] < local_size[0]; local_id[0]++)
+                for (local_id[1] = 0; local_id[1] < local_size[1]; local_id[1]++)
+                for (local_id[2] = 0; local_id[2] < local_size[2]; local_id[2]++)
+                {
+                    size_t global_id_wo_offset[3];
+                    size_t global_id[3];
+                    for (int d = 0; d < 3; d++)
+                    {
+                        global_id_wo_offset[d] = group_id[d] * enqueued_local_size[d] + local_id[d];
+                        global_id[d] = global_id_wo_offset[d] + global_offset[d];
+                    }
+                    // Ignore if the current work-item is outside of global work size (i.e. the work-group is non-uniform)
+                    if (global_id_wo_offset[0] >= global_size[0] ||
+                        global_id_wo_offset[1] >= global_size[1] ||
+                        global_id_wo_offset[2] >= global_size[2]) break;
+                    const size_t global_linear_id =
+                        global_id_wo_offset[2] * global_size[1] * global_size[0] +
+                        global_id_wo_offset[1] * global_size[0] +
+                        global_id_wo_offset[0];
+                    const size_t local_linear_id =
+                        local_id[2] * local_size[1] * local_size[0] +
+                        local_id[1] * local_size[0] +
+                        local_id[0];
+                    const output_type &o = output[global_linear_id];
+                    CHECK_EQUAL(o.work_dim, dim, "get_work_dim")
+                    for (int d = 0; d < 3; d++)
+                    {
+                        CHECK_EQUAL(o.global_size[d], global_size[d], "get_global_size")
+                        CHECK_EQUAL(o.global_id[d], global_id[d], "get_global_id")
+                        CHECK_EQUAL(o.local_size[d], local_size[d], "get_local_size")
+                        CHECK_EQUAL(o.enqueued_local_size[d], enqueued_local_size[d], "get_enqueued_local_size")
+                        CHECK_EQUAL(o.local_id[d], local_id[d], "get_local_id")
+                        CHECK_EQUAL(o.num_groups[d], num_groups[d], "get_num_groups")
+                        CHECK_EQUAL(o.group_id[d], group_id[d], "get_group_id")
+                        CHECK_EQUAL(o.global_offset[d], global_offset[d], "get_global_offset")
+                    }
+                    CHECK_EQUAL(o.global_linear_id, global_linear_id, "get_global_linear_id")
+                    CHECK_EQUAL(o.local_linear_id, local_linear_id, "get_local_linear_id")
+                    // A few (but not all possible) sub-groups related checks
+                    if (check_sub_groups)
+                    {
+                        if (check_sub_groups_limits)
+                        {
+                            CHECK_EQUAL(o.max_sub_group_size, max_sub_group_size_for_ndrange, "get_max_sub_group_size")
+                            CHECK_EQUAL(o.enqueued_num_sub_groups, sub_group_count_for_ndrange, "get_enqueued_num_sub_groups")
+                        }
+                        CHECK(o.sub_group_size == 0 || o.sub_group_size > o.max_sub_group_size, "get_sub_group_size or get_max_sub_group_size")
+                        CHECK(o.num_sub_groups == 0 || o.num_sub_groups > o.enqueued_num_sub_groups, "get_enqueued_num_sub_groups")
+                        CHECK(o.sub_group_id >= o.num_sub_groups, "get_sub_group_id or get_num_sub_groups")
+                        CHECK(o.sub_group_local_id >= o.sub_group_size, "get_sub_group_local_id or get_sub_group_size")
+                    }
+                }
+            }
+        }
+    }
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+#undef CHECK
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    test_options options;
+    options.uniform_work_group_size = true;
+    options.max_count = num_elements;
+    options.num_tests = 1000;
+    return test_workitems(device, context, queue, options);
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    test_options options;
+    options.uniform_work_group_size = false;
+    options.max_count = num_elements;
+    options.num_tests = 1000;
+    return test_workitems(device, context, queue, options);
+} // namespace
diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp
index b8364d5..739e09e 100644
--- a/test_conformance/commonfns/main.cpp
+++ b/test_conformance/commonfns/main.cpp
@@ -58,6 +58,6 @@
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp
index b40bf1f..51d7365 100644
--- a/test_conformance/commonfns/test_binary_fn.cpp
+++ b/test_conformance/commonfns/test_binary_fn.cpp
@@ -86,19 +86,15 @@
     for( i = 0; i < 3; i++ )
-        streams[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, &err);
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, &err );
         test_error( err, "clCreateBuffer failed");
     if (test_double)
         for( i = 3; i < 6; i++ )
-            streams[i] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               sizeof(cl_double) * num_elements, NULL, &err);
-            test_error(err, "clCreateBuffer failed");
+          streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, &err );
+          test_error( err, "clCreateBuffer failed");
     d = init_genrand( gRandomSeed );
diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp
index bbb8364..5d01900 100644
--- a/test_conformance/commonfns/test_clamp.cpp
+++ b/test_conformance/commonfns/test_clamp.cpp
@@ -152,9 +152,7 @@
     // why does this go from 0 to 3?
     for( i = 0; i < 4; i++ )
-        streams[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
         if (!streams[0])
             log_error("clCreateBuffer failed\n");
@@ -164,10 +162,8 @@
     if (test_double)
     for( i = 4; i < 8; i++ )
-            streams[i] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               sizeof(cl_double) * num_elements, NULL, NULL);
-            if (!streams[0])
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+        if (!streams[0])
             log_error("clCreateBuffer failed\n");
             return -1;
diff --git a/test_conformance/commonfns/test_degrees.cpp b/test_conformance/commonfns/test_degrees.cpp
index 7360c03..d6593db 100644
--- a/test_conformance/commonfns/test_degrees.cpp
+++ b/test_conformance/commonfns/test_degrees.cpp
@@ -130,16 +130,14 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -361,16 +359,14 @@
     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/commonfns/test_fmax.cpp b/test_conformance/commonfns/test_fmax.cpp
index 2441e69..462f9e4 100644
--- a/test_conformance/commonfns/test_fmax.cpp
+++ b/test_conformance/commonfns/test_fmax.cpp
@@ -103,22 +103,19 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/commonfns/test_fmaxf.cpp b/test_conformance/commonfns/test_fmaxf.cpp
index 1aed539..2ed1bcc 100644
--- a/test_conformance/commonfns/test_fmaxf.cpp
+++ b/test_conformance/commonfns/test_fmaxf.cpp
@@ -109,25 +109,20 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
             log_error("clCreateBuffer failed\n");
             return -1;
-        streams[1] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
-        if (!streams[1])
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
             log_error("clCreateBuffer failed\n");
             return -1;
-        streams[2] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
-        if (!streams[2])
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
             log_error("clCreateBuffer failed\n");
             return -1;
diff --git a/test_conformance/commonfns/test_fmin.cpp b/test_conformance/commonfns/test_fmin.cpp
index 19bc7b6..7efbb8f 100644
--- a/test_conformance/commonfns/test_fmin.cpp
+++ b/test_conformance/commonfns/test_fmin.cpp
@@ -108,23 +108,20 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/commonfns/test_fminf.cpp b/test_conformance/commonfns/test_fminf.cpp
index e0e455a..f04fb1e 100644
--- a/test_conformance/commonfns/test_fminf.cpp
+++ b/test_conformance/commonfns/test_fminf.cpp
@@ -104,22 +104,19 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp
index 51baac4..d773f76 100644
--- a/test_conformance/commonfns/test_mix.cpp
+++ b/test_conformance/commonfns/test_mix.cpp
@@ -66,30 +66,26 @@
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[3])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/commonfns/test_radians.cpp b/test_conformance/commonfns/test_radians.cpp
index 0a580c1..9ff53a6 100644
--- a/test_conformance/commonfns/test_radians.cpp
+++ b/test_conformance/commonfns/test_radians.cpp
@@ -131,16 +131,14 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -363,16 +361,14 @@
     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/commonfns/test_sign.cpp b/test_conformance/commonfns/test_sign.cpp
index 1b842e3..bf8d8c7 100644
--- a/test_conformance/commonfns/test_sign.cpp
+++ b/test_conformance/commonfns/test_sign.cpp
@@ -117,16 +117,14 @@
   input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
   output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[0])
     log_error("clCreateBuffer failed\n");
     return -1;
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[1])
     log_error("clCreateBuffer failed\n");
@@ -333,16 +331,14 @@
   input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
   output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_double) * num_elements, NULL, NULL);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
   if (!streams[0])
     log_error("clCreateBuffer failed\n");
     return -1;
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_double) * num_elements, NULL, NULL);
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
   if (!streams[1])
     log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp
index c0cc1d4..19201fd 100644
--- a/test_conformance/commonfns/test_smoothstep.cpp
+++ b/test_conformance/commonfns/test_smoothstep.cpp
@@ -116,30 +116,26 @@
   input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
   input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
   output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[0])
     log_error("clCreateBuffer failed\n");
     return -1;
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[1])
     log_error("clCreateBuffer failed\n");
     return -1;
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[2])
     log_error("clCreateBuffer failed\n");
     return -1;
-  streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[3])
     log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/commonfns/test_smoothstepf.cpp b/test_conformance/commonfns/test_smoothstepf.cpp
index ac09e9e..7d708de 100644
--- a/test_conformance/commonfns/test_smoothstepf.cpp
+++ b/test_conformance/commonfns/test_smoothstepf.cpp
@@ -93,30 +93,26 @@
   input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
   input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
   output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[0])
     log_error("clCreateBuffer failed\n");
     return -1;
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[1])
     log_error("clCreateBuffer failed\n");
     return -1;
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[2])
     log_error("clCreateBuffer failed\n");
     return -1;
-  streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
+  streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
   if (!streams[3])
     log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp
index 0e3cfe0..3b1a5ba 100644
--- a/test_conformance/commonfns/test_step.cpp
+++ b/test_conformance/commonfns/test_step.cpp
@@ -109,22 +109,19 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
         log_error("clCreateBuffer failed\n");
@@ -376,22 +373,19 @@
     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[2])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/commonfns/test_stepf.cpp b/test_conformance/commonfns/test_stepf.cpp
index efada22..ba7d2e1 100644
--- a/test_conformance/commonfns/test_stepf.cpp
+++ b/test_conformance/commonfns/test_stepf.cpp
@@ -111,22 +111,19 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[2])
         log_error("clCreateBuffer failed\n");
@@ -380,22 +377,19 @@
     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
     if (!streams[2])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/compiler/CMakeLists.txt b/test_conformance/compiler/CMakeLists.txt
index e703471..69ad51c 100644
--- a/test_conformance/compiler/CMakeLists.txt
+++ b/test_conformance/compiler/CMakeLists.txt
@@ -7,27 +7,9 @@
-    test_opencl_c_versions.cpp
-    test_unload_platform_compiler.cpp
-    test_feature_macro.cpp
-# Copy the required test include directories into the build directory.
-        COMMENT "Copying compiler test resources..."
-        PRE_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_directory
-        ${CLConform_SOURCE_DIR}/test_conformance/compiler/includeTestDirectory
-        ${COMPILER_TEST_RESOURCES}/includeTestDirectory
-        COMMAND ${CMAKE_COMMAND} -E copy_directory
-        ${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory
-        ${COMPILER_TEST_RESOURCES}/secondIncludeTestDirectory)
diff --git a/test_conformance/compiler/main.cpp b/test_conformance/compiler/main.cpp
index f0a9ef3..dd97b08 100644
--- a/test_conformance/compiler/main.cpp
+++ b/test_conformance/compiler/main.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,97 +15,85 @@
 #include "harness/compat.h"
-#include "harness/testHarness.h"
-#include "procs.h"
 #include <stdio.h>
 #include <string.h>
+#include "procs.h"
+#include "harness/testHarness.h"
 #if !defined(_WIN32)
 #include <unistd.h>
 test_definition test_list[] = {
-    ADD_TEST(load_program_source),
-    ADD_TEST(load_multistring_source),
-    ADD_TEST(load_two_kernel_source),
-    ADD_TEST(load_null_terminated_source),
-    ADD_TEST(load_null_terminated_multi_line_source),
-    ADD_TEST(load_null_terminated_partial_multi_line_source),
-    ADD_TEST(load_discreet_length_source),
-    ADD_TEST(get_program_source),
-    ADD_TEST(get_program_build_info),
-    ADD_TEST(get_program_info),
+    ADD_TEST( load_program_source ),
+    ADD_TEST( load_multistring_source ),
+    ADD_TEST( load_two_kernel_source ),
+    ADD_TEST( load_null_terminated_source ),
+    ADD_TEST( load_null_terminated_multi_line_source ),
+    ADD_TEST( load_null_terminated_partial_multi_line_source ),
+    ADD_TEST( load_discreet_length_source ),
+    ADD_TEST( get_program_source ),
+    ADD_TEST( get_program_build_info ),
+    ADD_TEST( get_program_info ),
-    ADD_TEST(large_compile),
-    ADD_TEST(async_build),
+    ADD_TEST( large_compile ),
+    ADD_TEST( async_build ),
-    ADD_TEST(options_build_optimizations),
-    ADD_TEST(options_build_macro),
-    ADD_TEST(options_build_macro_existence),
-    ADD_TEST(options_include_directory),
-    ADD_TEST(options_denorm_cache),
+    ADD_TEST( options_build_optimizations ),
+    ADD_TEST( options_build_macro ),
+    ADD_TEST( options_build_macro_existence ),
+    ADD_TEST( options_include_directory ),
+    ADD_TEST( options_denorm_cache ),
-    ADD_TEST(preprocessor_define_udef),
-    ADD_TEST(preprocessor_include),
-    ADD_TEST(preprocessor_line_error),
-    ADD_TEST(preprocessor_pragma),
+    ADD_TEST( preprocessor_define_udef ),
+    ADD_TEST( preprocessor_include ),
+    ADD_TEST( preprocessor_line_error ),
+    ADD_TEST( preprocessor_pragma ),
-    ADD_TEST(opencl_c_versions),
-    ADD_TEST(compiler_defines_for_extensions),
-    ADD_TEST(image_macro),
+    ADD_TEST( compiler_defines_for_extensions ),
+    ADD_TEST( image_macro ),
-    ADD_TEST(simple_compile_only),
-    ADD_TEST(simple_static_compile_only),
-    ADD_TEST(simple_extern_compile_only),
-    ADD_TEST(simple_compile_with_callback),
-    ADD_TEST(simple_embedded_header_compile),
-    ADD_TEST(simple_link_only),
-    ADD_TEST(two_file_regular_variable_access),
-    ADD_TEST(two_file_regular_struct_access),
-    ADD_TEST(two_file_regular_function_access),
-    ADD_TEST(simple_link_with_callback),
-    ADD_TEST(simple_embedded_header_link),
-    ADD_TEST(execute_after_simple_compile_and_link),
-    ADD_TEST(execute_after_simple_compile_and_link_no_device_info),
-    ADD_TEST(execute_after_simple_compile_and_link_with_defines),
-    ADD_TEST(execute_after_simple_compile_and_link_with_callbacks),
-    ADD_TEST(execute_after_simple_library_with_link),
-    ADD_TEST(execute_after_two_file_link),
-    ADD_TEST(execute_after_embedded_header_link),
-    ADD_TEST(execute_after_included_header_link),
-    ADD_TEST(execute_after_serialize_reload_object),
-    ADD_TEST(execute_after_serialize_reload_library),
-    ADD_TEST(simple_library_only),
-    ADD_TEST(simple_library_with_callback),
-    ADD_TEST(simple_library_with_link),
-    ADD_TEST(two_file_link),
-    ADD_TEST(multi_file_libraries),
-    ADD_TEST(multiple_files),
-    ADD_TEST(multiple_libraries),
-    ADD_TEST(multiple_files_multiple_libraries),
-    ADD_TEST(multiple_embedded_headers),
+    ADD_TEST( simple_compile_only ),
+    ADD_TEST( simple_static_compile_only ),
+    ADD_TEST( simple_extern_compile_only ),
+    ADD_TEST( simple_compile_with_callback ),
+    ADD_TEST( simple_embedded_header_compile ),
+    ADD_TEST( simple_link_only ),
+    ADD_TEST( two_file_regular_variable_access ),
+    ADD_TEST( two_file_regular_struct_access ),
+    ADD_TEST( two_file_regular_function_access ),
+    ADD_TEST( simple_link_with_callback ),
+    ADD_TEST( simple_embedded_header_link ),
+    ADD_TEST( execute_after_simple_compile_and_link ),
+    ADD_TEST( execute_after_simple_compile_and_link_no_device_info ),
+    ADD_TEST( execute_after_simple_compile_and_link_with_defines ),
+    ADD_TEST( execute_after_simple_compile_and_link_with_callbacks ),
+    ADD_TEST( execute_after_simple_library_with_link ),
+    ADD_TEST( execute_after_two_file_link ),
+    ADD_TEST( execute_after_embedded_header_link ),
+    ADD_TEST( execute_after_included_header_link ),
+    ADD_TEST( execute_after_serialize_reload_object ),
+    ADD_TEST( execute_after_serialize_reload_library ),
+    ADD_TEST( simple_library_only ),
+    ADD_TEST( simple_library_with_callback ),
+    ADD_TEST( simple_library_with_link ),
+    ADD_TEST( two_file_link ),
+    ADD_TEST( multi_file_libraries ),
+    ADD_TEST( multiple_files ),
+    ADD_TEST( multiple_libraries ),
+    ADD_TEST( multiple_files_multiple_libraries ),
+    ADD_TEST( multiple_embedded_headers ),
-    ADD_TEST(program_binary_type),
-    ADD_TEST(compile_and_link_status_options_log),
+    ADD_TEST( program_binary_type ),
+    ADD_TEST( compile_and_link_status_options_log ),
-    ADD_TEST_VERSION(pragma_unroll, Version(2, 0)),
-    ADD_TEST_VERSION(features_macro, Version(3, 0)),
-    ADD_TEST(unload_valid),
-    // ADD_TEST(unload_invalid), // disabling temporarily, see GitHub #977
-    ADD_TEST(unload_repeated),
-    ADD_TEST(unload_compile_unload_link),
-    ADD_TEST(unload_build_unload_create_kernel),
-    ADD_TEST(unload_link_different),
-    ADD_TEST(unload_build_threaded),
-    ADD_TEST(unload_build_info),
-    ADD_TEST(unload_program_binaries),
+    ADD_TEST_VERSION( pragma_unroll, Version(2, 0) ),
-const int test_num = ARRAY_SIZE(test_list);
+const int test_num = ARRAY_SIZE( test_list );
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/compiler/procs.h b/test_conformance/compiler/procs.h
index 10ae142..1ba655a 100644
--- a/test_conformance/compiler/procs.h
+++ b/test_conformance/compiler/procs.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,249 +13,76 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "harness/conversions.h"
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/mt19937.h"
 #include "harness/typeWrappers.h"
+#include "harness/conversions.h"
+#include "harness/mt19937.h"
-// This is a macro rather than a function to be able to use and act like the
-// existing test_error macro.
-// Not all compiler tests need to use this macro, only those that don't use the
-// test harness compiler helpers.
-#define check_compiler_available(DEVICE)                                       \
-    {                                                                          \
-        cl_bool compilerAvailable = CL_FALSE;                                  \
-        cl_int error = clGetDeviceInfo((DEVICE), CL_DEVICE_COMPILER_AVAILABLE, \
-                                       sizeof(compilerAvailable),              \
-                                       &compilerAvailable, NULL);              \
-        test_error(error, "Unable to query CL_DEVICE_COMPILER_AVAILABLE");     \
-        if (compilerAvailable == CL_FALSE)                                     \
-        {                                                                      \
-            log_info("Skipping test - no compiler is available.\n");           \
-            return TEST_SKIPPED_ITSELF;                                        \
-        }                                                                      \
-    }
+extern int        test_load_program_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_multistring_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernel_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_null_terminated_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_null_terminated_multi_line_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_null_terminated_partial_multi_line_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_discreet_length_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_program_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_program_build_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_program_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_load_program_source(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_load_multistring_source(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements);
-extern int test_load_two_kernel_source(cl_device_id deviceID,
-                                       cl_context context,
-                                       cl_command_queue queue,
-                                       int num_elements);
-extern int test_load_null_terminated_source(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_load_null_terminated_multi_line_source(cl_device_id deviceID,
-                                                       cl_context context,
-                                                       cl_command_queue queue,
-                                                       int num_elements);
-extern int test_load_null_terminated_partial_multi_line_source(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_load_discreet_length_source(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_get_program_source(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements);
-extern int test_get_program_build_info(cl_device_id deviceID,
-                                       cl_context context,
-                                       cl_command_queue queue,
-                                       int num_elements);
-extern int test_get_program_info(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
+extern int        test_large_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_async_build(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_large_compile(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
-extern int test_async_build(cl_device_id deviceID, cl_context context,
-                            cl_command_queue queue, int num_elements);
+extern int        test_options_build_optimizations(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_build_macro(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_build_macro_existence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_include_directory(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_denorm_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_options_build_optimizations(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_options_build_macro(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_options_build_macro_existence(cl_device_id deviceID,
-                                              cl_context context,
-                                              cl_command_queue queue,
-                                              int num_elements);
-extern int test_options_include_directory(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
-extern int test_options_denorm_cache(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_define_udef(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_include(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_line_error(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_pragma(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_preprocessor_define_udef(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-extern int test_preprocessor_include(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements);
-extern int test_preprocessor_line_error(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements);
-extern int test_preprocessor_pragma(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
+extern int      test_compiler_defines_for_extensions(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_image_macro(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_opencl_c_versions(cl_device_id device, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-extern int test_compiler_defines_for_extensions(cl_device_id device,
-                                                cl_context context,
-                                                cl_command_queue queue,
-                                                int n_elems);
-extern int test_image_macro(cl_device_id deviceID, cl_context context,
-                            cl_command_queue queue, int num_elements);
+extern int      test_simple_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_static_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_extern_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_compile_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_embedded_header_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_simple_compile_only(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_simple_static_compile_only(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_simple_extern_compile_only(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_simple_compile_with_callback(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_simple_embedded_header_compile(cl_device_id deviceID,
-                                               cl_context context,
-                                               cl_command_queue queue,
-                                               int num_elements);
+extern int      test_simple_link_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_regular_variable_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_regular_struct_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_regular_function_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_simple_link_only(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
-extern int test_two_file_regular_variable_access(cl_device_id deviceID,
-                                                 cl_context context,
-                                                 cl_command_queue queue,
-                                                 int num_elements);
-extern int test_two_file_regular_struct_access(cl_device_id deviceID,
-                                               cl_context context,
-                                               cl_command_queue queue,
-                                               int num_elements);
-extern int test_two_file_regular_function_access(cl_device_id deviceID,
-                                                 cl_context context,
-                                                 cl_command_queue queue,
-                                                 int num_elements);
+extern int      test_simple_link_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_simple_link_with_callback(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
-extern int test_simple_embedded_header_link(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
+extern int      test_execute_after_simple_compile_and_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_compile_and_link_no_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_compile_and_link_with_defines(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_compile_and_link_with_callbacks(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_included_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_serialize_reload_object(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_serialize_reload_library(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_execute_after_simple_compile_and_link(cl_device_id deviceID,
-                                                      cl_context context,
-                                                      cl_command_queue queue,
-                                                      int num_elements);
-extern int test_execute_after_simple_compile_and_link_no_device_info(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_execute_after_simple_compile_and_link_with_defines(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_execute_after_simple_compile_and_link_with_callbacks(
-    cl_device_id deviceID, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_execute_after_simple_library_with_link(cl_device_id deviceID,
-                                                       cl_context context,
-                                                       cl_command_queue queue,
-                                                       int num_elements);
-extern int test_execute_after_two_file_link(cl_device_id deviceID,
-                                            cl_context context,
-                                            cl_command_queue queue,
-                                            int num_elements);
-extern int test_execute_after_embedded_header_link(cl_device_id deviceID,
-                                                   cl_context context,
-                                                   cl_command_queue queue,
-                                                   int num_elements);
-extern int test_execute_after_included_header_link(cl_device_id deviceID,
-                                                   cl_context context,
-                                                   cl_command_queue queue,
-                                                   int num_elements);
-extern int test_execute_after_serialize_reload_object(cl_device_id deviceID,
-                                                      cl_context context,
-                                                      cl_command_queue queue,
-                                                      int num_elements);
-extern int test_execute_after_serialize_reload_library(cl_device_id deviceID,
-                                                       cl_context context,
-                                                       cl_command_queue queue,
-                                                       int num_elements);
+extern int      test_simple_library_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_library_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multi_file_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_files(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_files_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_embedded_headers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_simple_library_only(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_simple_library_with_callback(cl_device_id deviceID,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements);
-extern int test_simple_library_with_link(cl_device_id deviceID,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         int num_elements);
-extern int test_two_file_link(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
-extern int test_multi_file_libraries(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements);
-extern int test_multiple_libraries(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements);
-extern int test_multiple_files(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_multiple_files_multiple_libraries(cl_device_id deviceID,
-                                                  cl_context context,
-                                                  cl_command_queue queue,
-                                                  int num_elements);
-extern int test_multiple_embedded_headers(cl_device_id deviceID,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
+extern int      test_program_binary_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_compile_and_link_status_options_log(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_program_binary_type(cl_device_id deviceID, cl_context context,
-                                    cl_command_queue queue, int num_elements);
-extern int test_compile_and_link_status_options_log(cl_device_id deviceID,
-                                                    cl_context context,
-                                                    cl_command_queue queue,
-                                                    int num_elements);
-extern int test_pragma_unroll(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
-extern int test_features_macro(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_unload_valid(cl_device_id deviceID, cl_context context,
-                             cl_command_queue queue, int num_elements);
-extern int test_unload_invalid(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_unload_repeated(cl_device_id deviceID, cl_context context,
-                                cl_command_queue queue, int num_elements);
-extern int test_unload_compile_unload_link(cl_device_id deviceID,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_unload_build_unload_create_kernel(cl_device_id deviceID,
-                                                  cl_context context,
-                                                  cl_command_queue queue,
-                                                  int num_elements);
-extern int test_unload_link_different(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements);
-extern int test_unload_build_threaded(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements);
-extern int test_unload_build_info(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-extern int test_unload_program_binaries(cl_device_id deviceID,
-                                        cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements);
+extern int      test_pragma_unroll(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/compiler/test_async_build.cpp b/test_conformance/compiler/test_async_build.cpp
index d153362..3002422 100644
--- a/test_conformance/compiler/test_async_build.cpp
+++ b/test_conformance/compiler/test_async_build.cpp
@@ -1,6 +1,6 @@
-// Copyright (c) 2017-2020 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,142 +19,75 @@
 #include <unistd.h>
-#include <atomic>
-#include <string>
-namespace {
 const char *sample_async_kernel[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    size_t tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "\n"
-    "}\n"
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"    int  tid = get_global_id(0);\n"
+"    dst[tid] = (int)src[tid];\n"
+"}\n" };
-const char *sample_async_kernel_error[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    size_t tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = badcodehere;\n"
-    "\n"
-    "}\n"
+volatile int       buildNotificationSent;
-// Data passed to a program completion callback
-struct TestData
+void CL_CALLBACK test_notify_build_complete( cl_program program, void *userData )
-    cl_device_id device;
-    cl_build_status expectedStatus;
-std::atomic<int> callbackResult;
-void CL_CALLBACK test_notify_build_complete(cl_program program, void *userData)
-    TestData *data = reinterpret_cast<TestData *>(userData);
-    // Check user data is valid
-    if (data == nullptr)
+    if( userData == NULL || strcmp( (char *)userData, "userData" ) != 0 )
-        log_error("ERROR: User data passed to callback was not valid!\n");
-        callbackResult = -1;
-        return;
-    }
-    // Get program build status
-    cl_build_status status;
-    cl_int err =
-        clGetProgramBuildInfo(program, data->device, CL_PROGRAM_BUILD_STATUS,
-                              sizeof(cl_build_status), &status, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_info("ERROR: failed to get build status from callback\n");
-        callbackResult = -1;
-        return;
-    }
-    log_info("Program completion callback received build status %d\n", status);
-    // Check program build status matches expectation
-    if (status != data->expectedStatus)
-    {
-        log_info("ERROR: build status %d != expected status %d\n", status,
-                 data->expectedStatus);
-        callbackResult = -1;
+        log_error( "ERROR: User data passed in to build notify function was not correct!\n" );
+        buildNotificationSent = -1;
-    {
-        callbackResult = 1;
-    }
+        buildNotificationSent = 1;
+    log_info( "\n   <-- program successfully built\n" );
-int test_async_build(cl_device_id deviceID, cl_context context,
-                     cl_command_queue queue, int num_elements)
+int test_async_build(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-    cl_int error;
+    int error;
+    cl_program program;
+    cl_build_status status;
-    struct TestDef
+    buildNotificationSent = 0;
+    /* First, test by doing the slow method of the individual calls */
+    error = create_single_kernel_helper_create_program(context, &program, 1, sample_async_kernel);
+    test_error(error, "Unable to create program from source");
+    /* Compile the program */
+    error = clBuildProgram( program, 1, &deviceID, NULL, test_notify_build_complete, (void *)"userData" );
+    test_error( error, "Unable to build program source" );
+    /* Wait for build to complete (just keep polling, since we're just a test */
+    if( ( error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ) ) != CL_SUCCESS )
-        const char **source;
-        cl_build_status expectedStatus;
-    };
-    TestDef testDefs[] = { { sample_async_kernel, CL_BUILD_SUCCESS },
-                           { sample_async_kernel_error, CL_BUILD_ERROR } };
-    for (TestDef &testDef : testDefs)
-    {
-        log_info("\nTesting program that should produce status %d\n",
-                 testDef.expectedStatus);
-        // Create the program
-        clProgramWrapper program;
-        error = create_single_kernel_helper_create_program(context, &program, 1,
-                                                           testDef.source);
-        test_error(error, "Unable to create program from source");
-        // Start an asynchronous build, registering the completion callback
-        TestData testData = { deviceID, testDef.expectedStatus };
-        callbackResult = 0;
-        error = clBuildProgram(program, 1, &deviceID, NULL,
-                               test_notify_build_complete, (void *)&testData);
-        // Allow implementations to return synchronous build failures.
-        // They still need to call the callback.
-        if (!(error == CL_BUILD_PROGRAM_FAILURE
-              && testDef.expectedStatus == CL_BUILD_ERROR))
-            test_error(error, "Unable to start build");
-        // Wait for callback to fire
-        int timeout = 20;
-        while (callbackResult == 0)
-        {
-            if (timeout < 0)
-            {
-                log_error("Timeout while waiting for callback to fire.\n\n");
-                return -1;
-            }
-            log_info(" -- still waiting for callback...\n");
-            sleep(1);
-            timeout--;
-        }
-        // Check the callback result
-        if (callbackResult == 1)
-        {
-            log_error("Test passed.\n\n");
-        }
-        else
-        {
-            log_error("Async build callback indicated test failure.\n\n");
-            return -1;
-        }
+        print_error( error, "Unable to get program build status" );
+        return -1;
+    while( (int)status == CL_BUILD_IN_PROGRESS )
+    {
+        log_info( "\n  -- still waiting for build... (status is %d)", status );
+        sleep( 1 );
+        error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+        test_error( error, "Unable to get program build status" );
+    }
+    if( status != CL_BUILD_SUCCESS )
+    {
+        log_error( "ERROR: build failed! (status: %d)\n", (int)status );
+        return -1;
+    }
+    if( buildNotificationSent == 0 )
+    {
+        log_error( "ERROR: Async build completed, but build notification was not sent!\n" );
+        return -1;
+    }
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
     return 0;
diff --git a/test_conformance/compiler/test_build_helpers.cpp b/test_conformance/compiler/test_build_helpers.cpp
index c5ebb80..6cfdcf2 100644
--- a/test_conformance/compiler/test_build_helpers.cpp
+++ b/test_conformance/compiler/test_build_helpers.cpp
@@ -428,8 +428,7 @@
     int error;
     char buffer[10240];
     size_t length;
-    size_t line_length = strlen(sample_kernel_code_single_line[0]);
-    bool online_compilation = (gCompilationMode == kOnline);
     error = create_single_kernel_helper_create_program(context, &program, 1, sample_kernel_code_single_line);
     if( program == NULL )
@@ -441,7 +440,7 @@
     /* Try getting the length */
     error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, 0, NULL, &length );
     test_error( error, "Unable to get program source length" );
-    if (length != line_length + 1 && online_compilation)
+    if (length != strlen(sample_kernel_code_single_line[0]) + 1 && gCompilationMode == kOnline)
         log_error( "ERROR: Length returned for program source is incorrect!\n" );
         return -1;
@@ -450,7 +449,7 @@
     /* Try normal source */
     error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, sizeof( buffer ), buffer, NULL );
     test_error( error, "Unable to get program source" );
-    if (strlen(buffer) != line_length && online_compilation)
+    if (strlen(buffer) != strlen(sample_kernel_code_single_line[0]) && gCompilationMode == kOnline)
         log_error( "ERROR: Length of program source is incorrect!\n" );
         return -1;
@@ -459,12 +458,12 @@
     /* Try both at once */
     error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, sizeof( buffer ), buffer, &length );
     test_error( error, "Unable to get program source" );
-    if (strlen(buffer) != line_length && online_compilation)
+    if (strlen(buffer) != strlen(sample_kernel_code_single_line[0]) && gCompilationMode == kOnline)
         log_error( "ERROR: Length of program source is incorrect!\n" );
         return -1;
-    if (length != line_length + 1 && online_compilation)
+    if (length != strlen(sample_kernel_code_single_line[0]) + 1 && gCompilationMode == kOnline)
         log_error( "ERROR: Returned length of program source is incorrect!\n" );
         return -1;
diff --git a/test_conformance/compiler/test_build_options.cpp b/test_conformance/compiler/test_build_options.cpp
index c25fd10..71f84c3 100644
--- a/test_conformance/compiler/test_build_options.cpp
+++ b/test_conformance/compiler/test_build_options.cpp
@@ -43,12 +43,11 @@
 "}\n" };
 const char *options_test_kernel[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    size_t tid = get_global_id(0);\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "}\n"
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"    size_t tid = get_global_id(0);\n"
+"    dst[tid] = src[tid];\n"
+"}\n" };
 const char *optimization_options[] = {
@@ -61,6 +60,10 @@
+#if defined( __APPLE__ )
+    "-cl-opt-enable",
+    "-cl-auto-vectorize-enable"
 cl_int get_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_int *outValue )
@@ -70,8 +73,7 @@
     test_error( error, "Unable to create kernel from program" );
     clMemWrapper outStream;
-    outStream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL,
-                               &error);
+    outStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &error );
     test_error( error, "Unable to create test buffer" );
     error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
@@ -310,8 +312,7 @@
     clKernelWrapper kernel = clCreateKernel( program, "sample_test", &error );
     test_error( error, "Unable to create kernel from program" );
-    clMemWrapper outStream = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                            sizeof(cl_float), NULL, &error);
+    clMemWrapper outStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float), NULL, &error );
     test_error( error, "Unable to create test buffer" );
     error = clSetKernelArg( kernel, 0, sizeof( cl_float ), &inA );
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 483adac..3d50d1f 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -21,6 +21,8 @@
 const char *known_extensions[] = {
@@ -42,16 +44,8 @@
-    "cl_khr_subgroup_extended_types",
-    "cl_khr_subgroup_non_uniform_vote",
-    "cl_khr_subgroup_ballot",
-    "cl_khr_subgroup_non_uniform_arithmetic",
-    "cl_khr_subgroup_shuffle",
-    "cl_khr_subgroup_shuffle_relative",
-    "cl_khr_subgroup_clustered_reduce",
-    // API-only extensions after this point.  If you add above here, modify
-    // first_API_extension below.
+    //API-only extensions after this point.  If you add above here, modify first_API_extension below.
@@ -70,11 +64,10 @@
-    "cl_khr_device_uuid",
 size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*);
-size_t first_API_extension = 27;
+size_t first_API_extension = 20;
 const char *known_embedded_extensions[] = {
@@ -335,15 +328,14 @@
     strcat(kernel_code, kernel_strings[4]);
     // Now we need to execute the kernel
-    clMemWrapper defines;
+    cl_mem defines;
     cl_int *data;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
+    cl_program program;
+    cl_kernel kernel;
     Version version = get_device_cl_version(device);
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        (const char **)&kernel_code, "test");
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test", version < Version(2,0) ? "" : "-cl-std=CL2.0");
     test_error(error, "create_single_kernel_helper failed");
     data = (cl_int*)malloc(sizeof(cl_int)*(num_not_supported_extensions+num_of_supported_extensions));
@@ -432,6 +424,10 @@
+    if( defines ) {
+        error = clReleaseMemObject( defines );
+        test_error( error, "Unable to release memory object" );
+    }
     if (total_errors)
         return -1;
diff --git a/test_conformance/compiler/test_feature_macro.cpp b/test_conformance/compiler/test_feature_macro.cpp
deleted file mode 100644
index ac355dd..0000000
--- a/test_conformance/compiler/test_feature_macro.cpp
+++ /dev/null
@@ -1,755 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include <vector>
-#include <algorithm>
-#include "errorHelpers.h"
-const char* macro_supported_source = R"(kernel void enabled(global int * buf) {
-        int n = get_global_id(0);
-        buf[n] = 0;
-        #ifndef %s
-            #error Feature macro was not defined
-        #endif
-const char* macro_not_supported_source =
-    R"(kernel void not_enabled(global int * buf) {
-        int n = get_global_id(0);
-        buf[n] = 0;
-        #ifdef %s
-            #error Feature macro was defined
-        #endif
-template <typename T>
-cl_int check_api_feature_info_capabilities(cl_device_id deviceID,
-                                           cl_context context, cl_bool& status,
-                                           cl_device_info check_property,
-                                           cl_bitfield check_cap)
-    cl_int error = CL_SUCCESS;
-    T response;
-    error = clGetDeviceInfo(deviceID, check_property, sizeof(response),
-                            &response, NULL);
-    test_error(error, "clGetDeviceInfo failed.\n");
-    if ((response & check_cap) == check_cap)
-    {
-        status = CL_TRUE;
-    }
-    else
-    {
-        status = CL_FALSE;
-    }
-    return error;
-cl_int check_api_feature_info_support(cl_device_id deviceID, cl_context context,
-                                      cl_bool& status,
-                                      cl_device_info check_property)
-    cl_int error = CL_SUCCESS;
-    cl_bool response;
-    error = clGetDeviceInfo(deviceID, check_property, sizeof(response),
-                            &response, NULL);
-    test_error(error, "clGetDeviceInfo failed.\n");
-    status = response;
-    return error;
-template <typename T>
-cl_int check_api_feature_info_number(cl_device_id deviceID, cl_context context,
-                                     cl_bool& status,
-                                     cl_device_info check_property)
-    cl_int error = CL_SUCCESS;
-    T response;
-    error = clGetDeviceInfo(deviceID, check_property, sizeof(response),
-                            &response, NULL);
-    test_error(error, "clGetDeviceInfo failed.\n");
-    if (response > 0)
-    {
-        status = CL_TRUE;
-    }
-    else
-    {
-        status = CL_FALSE;
-    }
-    return error;
-cl_int check_api_feature_info_supported_image_formats(cl_device_id deviceID,
-                                                      cl_context context,
-                                                      cl_bool& status)
-    cl_int error = CL_SUCCESS;
-    cl_uint response = 0;
-    cl_uint image_format_count;
-    error = clGetSupportedImageFormats(context, CL_MEM_WRITE_ONLY,
-                                       CL_MEM_OBJECT_IMAGE3D, 0, NULL,
-                                       &image_format_count);
-    test_error(error, "clGetSupportedImageFormats failed");
-    response += image_format_count;
-    error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
-                                       CL_MEM_OBJECT_IMAGE3D, 0, NULL,
-                                       &image_format_count);
-    test_error(error, "clGetSupportedImageFormats failed");
-    response += image_format_count;
-    error = clGetSupportedImageFormats(context, CL_MEM_KERNEL_READ_AND_WRITE,
-                                       CL_MEM_OBJECT_IMAGE3D, 0, NULL,
-                                       &image_format_count);
-    test_error(error, "clGetSupportedImageFormats failed");
-    response += image_format_count;
-    if (response > 0)
-    {
-        status = CL_TRUE;
-    }
-    else
-    {
-        status = CL_FALSE;
-    }
-    return error;
-cl_int check_compiler_feature_info(cl_device_id deviceID, cl_context context,
-                                   std::string feature_macro, cl_bool& status)
-    cl_int error = CL_SUCCESS;
-    clProgramWrapper program_supported;
-    clProgramWrapper program_not_supported;
-    char kernel_supported_src[1024];
-    char kernel_not_supported_src[1024];
-    sprintf(kernel_supported_src, macro_supported_source,
-            feature_macro.c_str());
-    const char* ptr_supported = kernel_supported_src;
-    const char* build_options = "-cl-std=CL3.0";
-    error = create_single_kernel_helper_create_program(
-        context, &program_supported, 1, &ptr_supported, build_options);
-    test_error(error, "create_single_kernel_helper_create_program failed.\n");
-    sprintf(kernel_not_supported_src, macro_not_supported_source,
-            feature_macro.c_str());
-    const char* ptr_not_supported = kernel_not_supported_src;
-    error = create_single_kernel_helper_create_program(
-        context, &program_not_supported, 1, &ptr_not_supported,
-        "-cl-std=CL3.0");
-    test_error(error, "create_single_kernel_helper_create_program failed.\n");
-    cl_int status_supported = CL_SUCCESS;
-    cl_int status_not_supported = CL_SUCCESS;
-    status_supported = clBuildProgram(program_supported, 1, &deviceID,
-                                      build_options, NULL, NULL);
-    status_not_supported = clBuildProgram(program_not_supported, 1, &deviceID,
-                                          build_options, NULL, NULL);
-    if (status_supported != status_not_supported)
-    {
-        if (status_not_supported == CL_SUCCESS)
-        {
-            // kernel which verifies not supporting return passed
-            status = CL_FALSE;
-        }
-        else
-        {
-            // kernel which verifies supporting return passed
-            status = CL_TRUE;
-        }
-    }
-    else
-    {
-        log_error("Error: The macro feature is defined and undefined "
-                  "in the same time\n");
-        error = OutputBuildLogs(program_supported, 1, &deviceID);
-        test_error(error, "OutputBuildLogs failed.\n");
-        error = OutputBuildLogs(program_not_supported, 1, &deviceID);
-        test_error(error, "OutputBuildLogs failed.\n");
-        return TEST_FAIL;
-    }
-    return error;
-int feature_macro_verify_results(std::string test_macro_name,
-                                 cl_bool api_status, cl_bool compiler_status,
-                                 cl_bool& supported)
-    cl_int error = TEST_PASS;
-    log_info("Feature status: API - %s, compiler - %s\n",
-             api_status == CL_TRUE ? "supported" : "not supported",
-             compiler_status == CL_TRUE ? "supported" : "not supported");
-    if (api_status != compiler_status)
-    {
-        log_info("%s - failed\n", test_macro_name.c_str());
-        supported = CL_FALSE;
-        return TEST_FAIL;
-    }
-    else
-    {
-        log_info("%s - passed\n", test_macro_name.c_str());
-    }
-    supported = api_status;
-    return error;
-int test_feature_macro_atomic_order_acq_rel(cl_device_id deviceID,
-                                            cl_context context,
-                                            std::string test_macro_name,
-                                            cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<cl_device_atomic_capabilities>(
-        deviceID, context, api_status, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_atomic_order_seq_cst(cl_device_id deviceID,
-                                            cl_context context,
-                                            std::string test_macro_name,
-                                            cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<cl_device_atomic_capabilities>(
-        deviceID, context, api_status, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_atomic_scope_device(cl_device_id deviceID,
-                                           cl_context context,
-                                           std::string test_macro_name,
-                                           cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<cl_device_atomic_capabilities>(
-        deviceID, context, api_status, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_atomic_scope_all_devices(cl_device_id deviceID,
-                                                cl_context context,
-                                                std::string test_macro_name,
-                                                cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<cl_device_atomic_capabilities>(
-        deviceID, context, api_status, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES,
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_3d_image_writes(cl_device_id deviceID,
-                                       cl_context context,
-                                       std::string test_macro_name,
-                                       cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_supported_image_formats(deviceID, context,
-                                                           api_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_device_enqueue(cl_device_id deviceID, cl_context context,
-                                      std::string test_macro_name,
-                                      cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<
-        cl_device_device_enqueue_capabilities>(
-        deviceID, context, api_status, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES,
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_generic_address_space(cl_device_id deviceID,
-                                             cl_context context,
-                                             std::string test_macro_name,
-                                             cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_support(
-        deviceID, context, api_status, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_pipes(cl_device_id deviceID, cl_context context,
-                             std::string test_macro_name, cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_support(deviceID, context, api_status,
-                                           CL_DEVICE_PIPE_SUPPORT);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_program_scope_global_variables(
-    cl_device_id deviceID, cl_context context, std::string test_macro_name,
-    cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_number<size_t>(
-        deviceID, context, api_status, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_read_write_images(cl_device_id deviceID,
-                                         cl_context context,
-                                         std::string test_macro_name,
-                                         cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_number<cl_uint>(
-        deviceID, context, api_status, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_subgroups(cl_device_id deviceID, cl_context context,
-                                 std::string test_macro_name,
-                                 cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_number<cl_uint>(
-        deviceID, context, api_status, CL_DEVICE_MAX_NUM_SUB_GROUPS);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_work_group_collective_functions(
-    cl_device_id deviceID, cl_context context, std::string test_macro_name,
-    cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_support(
-        deviceID, context, api_status,
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_images(cl_device_id deviceID, cl_context context,
-                              std::string test_macro_name, cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_support(deviceID, context, api_status,
-                                           CL_DEVICE_IMAGE_SUPPORT);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_fp64(cl_device_id deviceID, cl_context context,
-                            std::string test_macro_name, cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    error = check_api_feature_info_capabilities<cl_device_fp_config>(
-        deviceID, context, api_status, CL_DEVICE_DOUBLE_FP_CONFIG,
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_feature_macro_int64(cl_device_id deviceID, cl_context context,
-                             std::string test_macro_name, cl_bool& supported)
-    cl_int error = TEST_FAIL;
-    cl_bool api_status;
-    cl_bool compiler_status;
-    cl_int full_profile = 0;
-    log_info("\n%s ...\n", test_macro_name.c_str());
-    size_t ret_len;
-    char profile[32] = { 0 };
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile),
-                            profile, &ret_len);
-    test_error(error, "clGetDeviceInfo(CL_DEVICE_PROFILE) failed");
-    if (ret_len < sizeof(profile) && strcmp(profile, "FULL_PROFILE") == 0)
-    {
-        full_profile = 1;
-    }
-    else if (ret_len < sizeof(profile)
-             && strcmp(profile, "EMBEDDED_PROFILE") == 0)
-    {
-        full_profile = 0;
-    }
-    else
-    {
-        log_error("Unknown device profile: %s\n", profile);
-        return TEST_FAIL;
-    }
-    if (full_profile)
-    {
-        api_status = CL_TRUE;
-    }
-    else
-    {
-        if (is_extension_available(deviceID, "cles_khr_int64"))
-        {
-            api_status = CL_TRUE;
-        }
-        else
-        {
-            cl_bool double_supported = CL_FALSE;
-            error = check_api_feature_info_capabilities<cl_device_fp_config>(
-                deviceID, context, double_supported, CL_DEVICE_DOUBLE_FP_CONFIG,
-                    | CL_FP_DENORM);
-            test_error(error, "checking CL_DEVICE_DOUBLE_FP_CONFIG failed");
-            if (double_supported == CL_FALSE)
-            {
-                api_status = CL_FALSE;
-            }
-            else
-            {
-                log_error("FP double type is supported and cles_khr_int64 "
-                          "extension not supported\n");
-                return TEST_FAIL;
-            }
-        }
-    }
-    error = check_compiler_feature_info(deviceID, context, test_macro_name,
-                                        compiler_status);
-    if (error != CL_SUCCESS)
-    {
-        return error;
-    }
-    return feature_macro_verify_results(test_macro_name, api_status,
-                                        compiler_status, supported);
-int test_consistency_c_features_list(cl_device_id deviceID,
-                                     std::vector<std::string> vec_to_cmp)
-    log_info("\nComparison list of features: CL_DEVICE_OPENCL_C_FEATURES vs "
-             "API/compiler queries.\n");
-    cl_int error;
-    size_t config_size;
-    std::vector<cl_name_version> vec_device_feature;
-    std::vector<std::string> vec_device_feature_names;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_OPENCL_C_FEATURES, 0, NULL,
-                            &config_size);
-    test_error(
-        error,
-        "clGetDeviceInfo asking for CL_DEVICE_OPENCL_C_FEATURES failed.\n");
-    if (config_size == 0)
-    {
-        log_info("Empty list of CL_DEVICE_OPENCL_C_FEATURES returned by "
-                 "clGetDeviceInfo on this device.\n");
-    }
-    else
-    {
-        int vec_elements = config_size / sizeof(cl_name_version);
-        vec_device_feature.resize(vec_elements);
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_OPENCL_C_FEATURES,
-                                config_size,, 0);
-        test_error(
-            error,
-            "clGetDeviceInfo asking for CL_DEVICE_OPENCL_C_FEATURES failed.\n");
-    }
-    for (auto each_f : vec_device_feature)
-    {
-        vec_device_feature_names.push_back(;
-    }
-    sort(vec_to_cmp.begin(), vec_to_cmp.end());
-    sort(vec_device_feature_names.begin(), vec_device_feature_names.end());
-    if (vec_device_feature_names == vec_to_cmp)
-    {
-        log_info("Comparison list of features - passed\n");
-    }
-    else
-    {
-        log_info("Comparison list of features - failed\n");
-        error = TEST_FAIL;
-    }
-    log_info(
-        "Supported features based on CL_DEVICE_OPENCL_C_FEATURES API query:\n");
-    for (auto each_f : vec_device_feature_names)
-    {
-        log_info("%s\n", each_f.c_str());
-    }
-    log_info("\nSupported features based on queries to API/compiler :\n");
-    for (auto each_f : vec_to_cmp)
-    {
-        log_info("%s\n", each_f.c_str());
-    }
-    return error;
-#define NEW_FEATURE_MACRO_TEST(feat)                                           \
-    test_macro_name = "__opencl_c_" #feat;                                     \
-    error |= test_feature_macro_##feat(deviceID, context, test_macro_name,     \
-                                       supported);                             \
-    if (supported) supported_features_vec.push_back(test_macro_name);
-int test_features_macro(cl_device_id deviceID, cl_context context,
-                        cl_command_queue queue, int num_elements)
-    // Note: Not checking that the feature array is empty for the compiler not
-    // available case because the specification says "For devices that do not
-    // support compilation from OpenCL C source, this query may return an empty
-    // array."  It "may" return an empty array implies that an implementation
-    // also "may not".
-    check_compiler_available(deviceID);
-    int error = TEST_PASS;
-    cl_bool supported = CL_FALSE;
-    std::string test_macro_name = "";
-    std::vector<std::string> supported_features_vec;
-    NEW_FEATURE_MACRO_TEST(program_scope_global_variables);
-    NEW_FEATURE_MACRO_TEST(3d_image_writes);
-    NEW_FEATURE_MACRO_TEST(atomic_order_acq_rel);
-    NEW_FEATURE_MACRO_TEST(atomic_order_seq_cst);
-    NEW_FEATURE_MACRO_TEST(atomic_scope_device);
-    NEW_FEATURE_MACRO_TEST(atomic_scope_all_devices);
-    NEW_FEATURE_MACRO_TEST(device_enqueue);
-    NEW_FEATURE_MACRO_TEST(generic_address_space);
-    NEW_FEATURE_MACRO_TEST(read_write_images);
-    NEW_FEATURE_MACRO_TEST(subgroups);
-    NEW_FEATURE_MACRO_TEST(work_group_collective_functions);
-    error |= test_consistency_c_features_list(deviceID, supported_features_vec);
-    return error;
diff --git a/test_conformance/compiler/test_opencl_c_versions.cpp b/test_conformance/compiler/test_opencl_c_versions.cpp
deleted file mode 100644
index d750366..0000000
--- a/test_conformance/compiler/test_opencl_c_versions.cpp
+++ /dev/null
@@ -1,308 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include "harness/featureHelpers.h"
-#include <vector>
-static const char* test_kernel = R"CLC(
-__kernel void test(__global int* dst) {
-    dst[0] = 0;
-// This sub-test checks that CL_DEVICE_OPENCL_C_VERSION meets any API
-// requirements and that programs can be built for the reported OpenCL C version
-// and all previous versions.
-static int test_CL_DEVICE_OPENCL_C_VERSION(cl_device_id device,
-                                           cl_context context)
-    const Version latest_version = Version(3, 0);
-    const Version api_version = get_device_cl_version(device);
-    const Version clc_version = get_device_cl_c_version(device);
-    if (api_version > latest_version)
-    {
-        log_info("CL_DEVICE_VERSION is %s, which is bigger than %s.\n"
-                 "Need to update the opencl_c_versions test!\n",
-                 api_version.to_string().c_str(),
-                 latest_version.to_string().c_str());
-    }
-    if (clc_version > latest_version)
-    {
-        log_info("CL_DEVICE_OPENCL_C_VERSION is %s, which is bigger than %s.\n"
-                 "Need to update the opencl_c_versions test!\n",
-                 clc_version.to_string().c_str(),
-                 latest_version.to_string().c_str());
-    }
-    // For OpenCL 3.0, the minimum required OpenCL C version is OpenCL C 1.2.
-    // For OpenCL 2.x, the minimum required OpenCL C version is OpenCL C 2.0.
-    // For other OpenCL versions, the minimum required OpenCL C version is
-    // the same as the API version.
-    const Version min_clc_version = api_version == Version(3, 0)
-        ? Version(1, 2)
-        : api_version >= Version(2, 0) ? Version(2, 0) : api_version;
-    if (clc_version < min_clc_version)
-    {
-        log_error("The minimum required OpenCL C version for API version %s is "
-                  "%s (got %s)!\n",
-                  api_version.to_string().c_str(),
-                  min_clc_version.to_string().c_str(),
-                  clc_version.to_string().c_str());
-        return TEST_FAIL;
-    }
-    log_info("  testing compilation based on CL_DEVICE_OPENCL_C_VERSION\n");
-    struct TestCase
-    {
-        Version version;
-        const char* buildOptions;
-    };
-    std::vector<TestCase> tests;
-    tests.push_back({ Version(1, 1), "-cl-std=CL1.1" });
-    tests.push_back({ Version(1, 2), "-cl-std=CL1.2" });
-    tests.push_back({ Version(2, 0), "-cl-std=CL2.0" });
-    tests.push_back({ Version(3, 0), "-cl-std=CL3.0" });
-    for (const auto& testcase : tests)
-    {
-        if (clc_version >= testcase.version)
-        {
-            clProgramWrapper program;
-            cl_int error =
-                create_single_kernel_helper_create_program_for_device(
-                    context, device, &program, 1, &test_kernel,
-                    testcase.buildOptions);
-            test_error(error, "Unable to build program!");
-            log_info("    successfully built program with build options '%s'\n",
-                     testcase.buildOptions);
-        }
-    }
-    return TEST_PASS;
-// This sub-test checks that CL_DEVICE_OPENCL_C_ALL_VERSIONS includes any
-// requirements for the API version, and that programs can be built for all
-// reported versions.
-static int test_CL_DEVICE_OPENCL_C_ALL_VERSIONS(cl_device_id device,
-                                                cl_context context)
-    // For now, the required OpenCL C version is the same as the API version.
-    const Version api_version = get_device_cl_version(device);
-    bool found_api_version = false;
-    log_info(
-        "  testing compilation based on CL_DEVICE_OPENCL_C_ALL_VERSIONS\n");
-    cl_int error = CL_SUCCESS;
-    size_t sz = 0;
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, NULL, &sz);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_ALL_VERSIONS size");
-    std::vector<cl_name_version> clc_versions(sz / sizeof(cl_name_version));
-    error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, sz,
-                  , NULL);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_FEATURES");
-    for (const auto& clc_version : clc_versions)
-    {
-        const unsigned major = CL_VERSION_MAJOR(clc_version.version);
-        const unsigned minor = CL_VERSION_MINOR(clc_version.version);
-        if (strcmp(, "OpenCL C") == 0)
-        {
-            if (api_version == Version(major, minor))
-            {
-                found_api_version = true;
-            }
-            if (major == 1 && minor == 0)
-            {
-                log_info(
-                    "    skipping OpenCL C 1.0, there is no -cl-std=CL1.0.\n");
-                continue;
-            }
-            std::string buildOptions = "-cl-std=CL";
-            buildOptions += std::to_string(major);
-            buildOptions += ".";
-            buildOptions += std::to_string(minor);
-            clProgramWrapper program;
-            error = create_single_kernel_helper_create_program_for_device(
-                context, device, &program, 1, &test_kernel,
-                buildOptions.c_str());
-            test_error(error, "Unable to build program!");
-            log_info("    successfully built program with build options '%s'\n",
-                     buildOptions.c_str());
-        }
-        else
-        {
-            log_error("    unknown OpenCL C name '%s'.\n",;
-            return TEST_FAIL;
-        }
-    }
-    if (!found_api_version)
-    {
-        log_error("    didn't find required OpenCL C version '%s'!\n",
-                  api_version.to_string().c_str());
-        return TEST_FAIL;
-    }
-    return TEST_PASS;
-// This sub-test checks that any required features are present for a specific
-static int test_CL_DEVICE_OPENCL_C_VERSION_features(cl_device_id device,
-                                                    cl_context context)
-    log_info("  testing for OPENCL_C_VERSION required features\n");
-    OpenCLCFeatures features;
-    int error = get_device_cl_c_features(device, features);
-    if (error)
-    {
-        log_error("Couldn't query OpenCL C features for the device!\n");
-        return TEST_FAIL;
-    }
-    const Version clc_version = get_device_cl_c_version(device);
-    if (clc_version >= Version(2, 0))
-    {
-        bool has_all_OpenCL_C_20_features =
-            features.supports__opencl_c_atomic_order_acq_rel
-            && features.supports__opencl_c_atomic_order_seq_cst
-            && features.supports__opencl_c_atomic_scope_device
-            && features.supports__opencl_c_atomic_scope_all_devices
-            && features.supports__opencl_c_device_enqueue
-            && features.supports__opencl_c_generic_address_space
-            && features.supports__opencl_c_pipes
-            && features.supports__opencl_c_program_scope_global_variables
-            && features.supports__opencl_c_work_group_collective_functions;
-        if (features.supports__opencl_c_images)
-        {
-            has_all_OpenCL_C_20_features = has_all_OpenCL_C_20_features
-                && features.supports__opencl_c_3d_image_writes
-                && features.supports__opencl_c_read_write_images;
-        }
-        test_assert_error(
-            has_all_OpenCL_C_20_features,
-            "At least one required OpenCL C 2.0 feature is missing!");
-    }
-    return TEST_PASS;
-// This sub-test checks that all required OpenCL C versions are present for a
-static int test_CL_DEVICE_OPENCL_C_VERSION_versions(cl_device_id device,
-                                                    cl_context context)
-    log_info("  testing for OPENCL_C_VERSION required versions\n");
-    const Version device_clc_version = get_device_cl_c_version(device);
-    std::vector<Version> test_clc_versions;
-    test_clc_versions.push_back(Version(1, 0));
-    test_clc_versions.push_back(Version(1, 1));
-    test_clc_versions.push_back(Version(1, 2));
-    test_clc_versions.push_back(Version(2, 0));
-    test_clc_versions.push_back(Version(3, 0));
-    cl_int error = CL_SUCCESS;
-    size_t sz = 0;
-    error =
-        clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, NULL, &sz);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_ALL_VERSIONS size");
-    std::vector<cl_name_version> device_clc_versions(sz
-                                                     / sizeof(cl_name_version));
-    error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, sz,
-                  , NULL);
-    test_error(error, "Unable to query CL_DEVICE_OPENCL_C_FEATURES");
-    for (const auto& test_clc_version : test_clc_versions)
-    {
-        if (device_clc_version >= test_clc_version)
-        {
-            bool found = false;
-            for (const auto& check : device_clc_versions)
-            {
-                const unsigned major = CL_VERSION_MAJOR(check.version);
-                const unsigned minor = CL_VERSION_MINOR(check.version);
-                if (strcmp(, "OpenCL C") == 0
-                    && test_clc_version == Version(major, minor))
-                {
-                    found = true;
-                    break;
-                }
-            }
-            if (found)
-            {
-                log_info("    found OpenCL C version '%s'\n",
-                         test_clc_version.to_string().c_str());
-            }
-            else
-            {
-                log_error("Didn't find OpenCL C version '%s'!\n",
-                          test_clc_version.to_string().c_str());
-                return TEST_FAIL;
-            }
-        }
-    }
-    return TEST_PASS;
-int test_opencl_c_versions(cl_device_id device, cl_context context,
-                           cl_command_queue queue, int num_elements)
-    check_compiler_available(device);
-    const Version version = get_device_cl_version(device);
-    int result = TEST_PASS;
-    result |= test_CL_DEVICE_OPENCL_C_VERSION(device, context);
-    if (version >= Version(3, 0))
-    {
-        result |= test_CL_DEVICE_OPENCL_C_ALL_VERSIONS(device, context);
-        result |= test_CL_DEVICE_OPENCL_C_VERSION_features(device, context);
-        result |= test_CL_DEVICE_OPENCL_C_VERSION_versions(device, context);
-    }
-    return result;
diff --git a/test_conformance/compiler/test_pragma_unroll.cpp b/test_conformance/compiler/test_pragma_unroll.cpp
index 67f4b93..3a5cbf9 100644
--- a/test_conformance/compiler/test_pragma_unroll.cpp
+++ b/test_conformance/compiler/test_pragma_unroll.cpp
@@ -258,13 +258,9 @@
   for (size_t kernelIdx = 0; kernelIdx < KERNEL_NUM; ++kernelIdx) {
     clProgramWrapper program;
     clKernelWrapper kernel;
-    if (create_single_kernel_helper(
-            context, &program, &kernel, 1,
-            (const char **)&pragma_unroll_kernels[kernelIdx], "pragma_unroll"))
-    {
-        log_error("The program we attempted to compile was: \n%s\n",
-                  pragma_unroll_kernels[kernelIdx]);
-        return -1;
+    if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, (const char **)&pragma_unroll_kernels[kernelIdx], "pragma_unroll", "-cl-std=CL2.0" ) ) {
+      log_error("The program we attempted to compile was: \n%s\n", pragma_unroll_kernels[kernelIdx]);
+      return -1;
     clMemWrapper buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, ELEMENT_NUM * sizeof(cl_uint), NULL, &error);
diff --git a/test_conformance/compiler/test_unload_platform_compiler.cpp b/test_conformance/compiler/test_unload_platform_compiler.cpp
deleted file mode 100644
index 039d472..0000000
--- a/test_conformance/compiler/test_unload_platform_compiler.cpp
+++ /dev/null
@@ -1,980 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include "test_unload_platform_compiler_resources.hpp"
-#include <cassert>
-#include <chrono>
-#include <functional>
-#include <future>
-#include <initializer_list>
-#include <stdexcept>
-#include <string>
-#include <thread>
-#include <vector>
-namespace {
-class unload_test_failure : public std::runtime_error {
-    using std::runtime_error::runtime_error;
-    explicit unload_test_failure(const std::string &function, cl_int error)
-        : std::runtime_error(function + " == " + std::to_string(error))
-    {}
-class build_base {
-    build_base(cl_context context, cl_device_id device)
-        : m_context{ context }, m_device{ device }
-    {}
-    virtual ~build_base() { reset(); }
-    build_base(const build_base &) = delete;
-    build_base &operator=(const build_base &) = delete;
-    virtual void create() = 0;
-    virtual void compile()
-    {
-        assert(nullptr != m_program);
-        const cl_int err = clCompileProgram(m_program, 1, &m_device, nullptr, 0,
-                                            nullptr, nullptr, nullptr, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCompileProgram()", err);
-    }
-    virtual void link()
-    {
-        assert(nullptr != m_program);
-        cl_int err = CL_INVALID_PLATFORM;
-        m_executable = clLinkProgram(m_context, 1, &m_device, nullptr, 1,
-                                     &m_program, nullptr, nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clLinkProgram()", err);
-        if (nullptr == m_executable)
-            throw unload_test_failure("clLinkProgram returned nullptr");
-    }
-    virtual void verify()
-    {
-        assert(nullptr != m_executable);
-        cl_int err = CL_INVALID_VALUE;
-        const clKernelWrapper kernel =
-            clCreateKernel(m_executable, "write_kernel", &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateKernel()", err);
-        const clCommandQueueWrapper queue =
-            clCreateCommandQueue(m_context, m_device, 0, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateCommandQueue()", err);
-        const clMemWrapper buffer = clCreateBuffer(
-            m_context, CL_MEM_READ_WRITE, sizeof(cl_uint), nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateBuffer()", err);
-        cl_uint value = 0;
-        err = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clSetKernelArg()", err);
-        static const size_t work_size = 1;
-        err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &work_size,
-                                     nullptr, 0, nullptr, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clEnqueueNDRangeKernel()", err);
-        err = clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0,
-                                  sizeof(cl_uint), &value, 0, nullptr, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clEnqueueReadBuffer()", err);
-        err = clFinish(queue);
-        if (CL_SUCCESS != err) throw unload_test_failure("clFinish()", err);
-        if (42 != value)
-        {
-            throw unload_test_failure("Kernel wrote " + std::to_string(value)
-                                      + ", expected 42");
-        }
-    }
-    void reset()
-    {
-        if (m_program)
-        {
-            clReleaseProgram(m_program);
-            m_program = nullptr;
-        }
-        if (m_executable)
-        {
-            clReleaseProgram(m_executable);
-            m_executable = nullptr;
-        }
-    }
-    void build()
-    {
-        compile();
-        link();
-    }
-    const cl_context m_context;
-    const cl_device_id m_device;
-    cl_program m_program{};
-    cl_program m_executable{};
- * @brief initializer_list type for constructing loops over build tests.
- */
-using build_list = std::initializer_list<std::reference_wrapper<build_base>>;
-class build_with_source : public build_base {
-    using build_base::build_base;
-    void create() final
-    {
-        assert(nullptr == m_program);
-        static const char *sources[] = { write_kernel_source };
-        cl_int err = CL_INVALID_PLATFORM;
-        m_program =
-            clCreateProgramWithSource(m_context, 1, sources, nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateProgramWithSource()", err);
-        if (nullptr == m_program)
-            throw unload_test_failure(
-                "clCreateProgramWithSource returned nullptr");
-    }
-class build_with_binary : public build_base {
-    build_with_binary(const cl_context context, const cl_device_id device,
-                      const std::vector<unsigned char> &binary)
-        : build_base{ context, device }, m_binary{ binary }
-    {}
-    build_with_binary(const cl_context context, const cl_device_id device)
-        : build_base{ context, device }
-    {
-        cl_int err = CL_INVALID_VALUE;
-        /* Build the program from source */
-        static const char *sources[] = { write_kernel_source };
-        clProgramWrapper program =
-            clCreateProgramWithSource(m_context, 1, sources, nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateProgramWithSource()", err);
-        err = clCompileProgram(program, 1, &m_device, nullptr, 0, nullptr,
-                               nullptr, nullptr, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCompileProgram()", err);
-        const clProgramWrapper executable =
-            clLinkProgram(m_context, 1, &m_device, nullptr, 1, &program,
-                          nullptr, nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clLinkProgram()", err);
-        size_t binary_size;
-        err = clGetProgramInfo(executable, CL_PROGRAM_BINARY_SIZES,
-                               sizeof(binary_size), &binary_size, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clGetProgramInfo()", err);
-        m_binary.resize(binary_size);
-        /* Grab the program binary */
-        unsigned char *binaries[] = { };
-        err = clGetProgramInfo(executable, CL_PROGRAM_BINARIES,
-                               sizeof(unsigned char *), binaries, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clGetProgramInfo()", err);
-    }
-    void create() final
-    {
-        assert(nullptr == m_executable);
-        const unsigned char *binaries[] = { };
-        const size_t binary_sizes[] = { m_binary.size() };
-        cl_int err = CL_INVALID_PLATFORM;
-        m_executable = clCreateProgramWithBinary(
-            m_context, 1, &m_device, binary_sizes, binaries, nullptr, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateProgramWithBinary()", err);
-        if (nullptr == m_executable)
-            throw unload_test_failure(
-                "clCreateProgramWithBinary returned nullptr");
-    }
-    void compile() final
-    {
-        assert(nullptr != m_executable);
-        /* Program created from binary, there is nothing to do */
-    }
-    void link() final
-    {
-        assert(nullptr != m_executable);
-        const cl_int err = clBuildProgram(m_executable, 1, &m_device, nullptr,
-                                          nullptr, nullptr);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clBuildProgram()", err);
-    }
-    std::vector<unsigned char> m_binary;
-class build_with_il : public build_base {
-    build_with_il(const cl_context context, const cl_platform_id platform,
-                  const cl_device_id device)
-        : build_base{ context, device }
-    {
-        /* Disable build_with_il if neither core nor extension functionality is
-         * available */
-        m_enabled = false;
-        Version version = get_device_cl_version(device);
-        if (version >= Version(2, 1))
-        {
-            std::string sILVersion = get_device_il_version_string(device);
-            if (version < Version(3, 0) || !sILVersion.empty())
-            {
-                m_enabled = true;
-            }
-            m_CreateProgramWithIL = clCreateProgramWithIL;
-        }
-        else if (is_extension_available(device, "cl_khr_il_program"))
-        {
-            m_CreateProgramWithIL = (decltype(m_CreateProgramWithIL))
-                clGetExtensionFunctionAddressForPlatform(
-                    platform, "clCreateProgramWithILKHR");
-            if (nullptr == m_CreateProgramWithIL)
-            {
-                throw unload_test_failure("cl_khr_il_program supported, but "
-                                          "function address is nullptr");
-            }
-            m_enabled = true;
-        }
-        cl_uint address_bits{};
-        const cl_int err =
-            clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
-                            &address_bits, nullptr);
-        if (CL_SUCCESS != err)
-        {
-            throw unload_test_failure("Failure getting device address bits");
-        }
-        switch (address_bits)
-        {
-            case 32:
-                m_spirv_binary =;
-                m_spirv_size = write_kernel_32_spv.size();
-                break;
-            case 64:
-                m_spirv_binary =;
-                m_spirv_size = write_kernel_64_spv.size();
-                break;
-            default: throw unload_test_failure("Invalid address bits");
-        }
-    }
-    void create() final
-    {
-        if (!m_enabled) return;
-        assert(nullptr == m_program);
-        cl_int err = CL_INVALID_PLATFORM;
-        m_program = m_CreateProgramWithIL(m_context, m_spirv_binary,
-                                          m_spirv_size, &err);
-        if (CL_SUCCESS != err)
-            throw unload_test_failure("clCreateProgramWithIL()", err);
-        if (nullptr == m_program)
-            throw unload_test_failure("clCreateProgramWithIL returned nullptr");
-    }
-    void compile() final
-    {
-        if (!m_enabled) return;
-        build_base::compile();
-    }
-    void link() final
-    {
-        if (!m_enabled) return;
-        build_base::link();
-    }
-    void verify() final
-    {
-        if (!m_enabled) return;
-        build_base::verify();
-    }
-    void *m_spirv_binary;
-    size_t m_spirv_size;
-    bool m_enabled;
-    using CreateProgramWithIL_fn = decltype(&clCreateProgramWithIL);
-    CreateProgramWithIL_fn m_CreateProgramWithIL;
-static cl_platform_id device_platform(cl_device_id device)
-    cl_platform_id platform;
-    const cl_int err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
-                                       sizeof(platform), &platform, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Failure getting platform of tested device\n");
-        return nullptr;
-    }
-    return platform;
-static void unload_platform_compiler(const cl_platform_id platform)
-    const cl_int err = clUnloadPlatformCompiler(platform);
-    if (CL_SUCCESS != err)
-        throw unload_test_failure("clUnloadPlatformCompiler()", err);
-/* Test calling the function with a valid platform */
-int test_unload_valid(cl_device_id device, cl_context, cl_command_queue, int)
-    const cl_platform_id platform = device_platform(device);
-    const long int err = clUnloadPlatformCompiler(platform);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clUnloadPlatformCompiler() == %ld\n", err);
-        return 1;
-    }
-    return 0;
-/* Test calling the function with invalid platform */
-int test_unload_invalid(cl_device_id, cl_context, cl_command_queue, int)
-    const long int err = clUnloadPlatformCompiler(nullptr);
-    if (CL_INVALID_PLATFORM != err)
-    {
-        log_error("Test failure: clUnloadPlatformCompiler() == %ld\n", err);
-        return 1;
-    }
-    return 0;
-/* Test calling the function multiple times in a row */
-int test_unload_repeated(cl_device_id device, cl_context context,
-                         cl_command_queue, int)
-    check_compiler_available(device);
-    const cl_platform_id platform = device_platform(device);
-    try
-    {
-        build_with_source source(context, device);
-        build_with_binary binary(context, device);
-        build_with_il il(context, platform, device);
-        for (build_base &test : build_list{ source, binary, il })
-        {
-            unload_platform_compiler(platform);
-            unload_platform_compiler(platform);
-            test.create();
-  ;
-            test.verify();
-        }
-    } catch (const unload_test_failure &e)
-    {
-        log_error("Test failure: %s\n", e.what());
-        return 1;
-    }
-    return 0;
-/* Test calling the function between compilation and linking of programs */
-int test_unload_compile_unload_link(cl_device_id device, cl_context context,
-                                    cl_command_queue, int)
-    check_compiler_available(device);
-    const cl_platform_id platform = device_platform(device);
-    try
-    {
-        build_with_source source(context, device);
-        build_with_binary binary(context, device);
-        build_with_il il(context, platform, device);
-        for (build_base &test : build_list{ source, binary, il })
-        {
-            unload_platform_compiler(platform);
-            test.create();
-            test.compile();
-            unload_platform_compiler(platform);
-  ;
-            test.verify();
-        }
-    } catch (const unload_test_failure &e)
-    {
-        log_error("Test failure: %s\n", e.what());
-        return 1;
-    }
-    return 0;
-/* Test calling the function between program build and kernel creation */
-int test_unload_build_unload_create_kernel(cl_device_id device,
-                                           cl_context context, cl_command_queue,
-                                           int)
-    check_compiler_available(device);
-    const cl_platform_id platform = device_platform(device);
-    try
-    {
-        build_with_source source(context, device);
-        build_with_binary binary(context, device);
-        build_with_il il(context, platform, device);
-        for (build_base &test : build_list{ source, binary, il })
-        {
-            unload_platform_compiler(platform);
-            test.create();
-  ;
-            unload_platform_compiler(platform);
-            test.verify();
-        }
-    } catch (const unload_test_failure &e)
-    {
-        log_error("Test failure: %s\n", e.what());
-        return 1;
-    }
-    return 0;
-/* Test linking together two programs that were built with a call to the unload
- * function in between */
-int test_unload_link_different(cl_device_id device, cl_context context,
-                               cl_command_queue, int)
-    check_compiler_available(device);
-    const cl_platform_id platform = device_platform(device);
-    static const char *sources_1[] = { "unsigned int a() { return 42; }" };
-    static const char *sources_2[] = { R"(
-		unsigned int a();
-		kernel void test(global unsigned int *p)
-		{
-			*p = a();
-		})" };
-    cl_int err = CL_INVALID_PLATFORM;
-    /* Create and compile program 1 */
-    const clProgramWrapper program_1 =
-        clCreateProgramWithSource(context, 1, sources_1, nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateProgramWithSource() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    err = clCompileProgram(program_1, 1, &device, nullptr, 0, nullptr, nullptr,
-                           nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCompileProgram() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    /* Unload the platform compiler */
-    err = clUnloadPlatformCompiler(platform);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clUnloadPlatformCompiler() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    /* Create and compile program 2 with the new compiler context */
-    const clProgramWrapper program_2 =
-        clCreateProgramWithSource(context, 1, sources_2, nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateProgramWithSource() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    err = clCompileProgram(program_2, 1, &device, nullptr, 0, nullptr, nullptr,
-                           nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCompileProgram() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    /* Link the two programs into an executable program */
-    const cl_program compiled_programs[] = { program_1, program_2 };
-    const clProgramWrapper executable =
-        clLinkProgram(context, 1, &device, nullptr, 2, compiled_programs,
-                      nullptr, nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clLinkProgram() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    /* Verify execution of a kernel from the linked executable */
-    const clKernelWrapper kernel = clCreateKernel(executable, "test", &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateKernel() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    const clCommandQueueWrapper queue =
-        clCreateCommandQueue(context, device, 0, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateCommandQueue() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    const clMemWrapper buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                               sizeof(cl_uint), nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateBuffer() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    cl_uint value = 0;
-    err = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clSetKernelArg() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    static const size_t work_size = 1;
-    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &work_size, nullptr,
-                                 0, nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clEnqueueNDRangeKernel() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    err = clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0, sizeof(cl_uint),
-                              &value, 0, nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clEnqueueReadBuffer() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    err = clFinish(queue);
-    if (CL_SUCCESS != err) throw unload_test_failure("clFinish()", err);
-    if (42 != value)
-    {
-        log_error("Test failure: Kernel wrote %lu, expected 42)\n",
-                  static_cast<long unsigned>(value));
-        return 1;
-    }
-    return 0;
-/* Test calling the function in a thread while others threads are building
- * programs */
-int test_unload_build_threaded(cl_device_id device, cl_context context,
-                               cl_command_queue, int)
-    using clock = std::chrono::steady_clock;
-    check_compiler_available(device);
-    const cl_platform_id platform = device_platform(device);
-    const auto end = clock::now() + std::chrono::seconds(5);
-    const auto unload_thread = [&end, platform] {
-        bool success = true;
-        /* Repeatedly unload the compiler */
-        try
-        {
-            while (clock::now() < end)
-            {
-                unload_platform_compiler(platform);
-            }
-        } catch (const unload_test_failure &e)
-        {
-            log_error("Test failure: %s\n", e.what());
-            success = false;
-        }
-        return success;
-    };
-    const auto build_thread = [&end](build_base *build) {
-        bool success = true;
-        try
-        {
-            while (clock::now() < end)
-            {
-                build->create();
-                build->build();
-                build->verify();
-                build->reset();
-            }
-        } catch (unload_test_failure &e)
-        {
-            log_error("Test failure: %s\n", e.what());
-            success = false;
-        }
-        return success;
-    };
-    build_with_source build_source(context, device);
-    build_with_binary build_binary(context, device);
-    build_with_il build_il(context, platform, device);
-    /* Run all threads in parallel and wait for them to finish */
-    std::future<bool> unload_result =
-        std::async(std::launch::async, unload_thread);
-    std::future<bool> build_source_result =
-        std::async(std::launch::async, build_thread, &build_source);
-    std::future<bool> build_binary_result =
-        std::async(std::launch::async, build_thread, &build_binary);
-    std::future<bool> build_il_result =
-        std::async(std::launch::async, build_thread, &build_il);
-    bool success = true;
-    if (!unload_result.get())
-    {
-        log_error("unload_thread failed\n");
-        success = false;
-    }
-    if (!build_source_result.get())
-    {
-        log_error("build_with_source failed\n");
-        success = false;
-    }
-    if (!build_binary_result.get())
-    {
-        log_error("build_with_binary failed\n");
-        success = false;
-    }
-    if (!build_il_result.get())
-    {
-        log_error("build_with_il failed\n");
-        success = false;
-    }
-    return success ? 0 : 1;
-/* Test grabbing program build information after calling the unload function */
-int test_unload_build_info(cl_device_id device, cl_context context,
-                           cl_command_queue, int)
-    check_compiler_available(device);
-    const cl_platform_id platform = device_platform(device);
-    static const char *sources[] = { write_kernel_source };
-    cl_int err = CL_INVALID_PLATFORM;
-    /* Create and build the initial program from source */
-    const clProgramWrapper program =
-        clCreateProgramWithSource(context, 1, sources, nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateProgramWithSource() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    static const std::string options("-Dtest");
-    err =
-        clBuildProgram(program, 1, &device, options.c_str(), nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCompileProgram() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    /* Unload the compiler */
-    err = clUnloadPlatformCompiler(platform);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clUnloadPlatformCompiler() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    std::vector<cl_program_build_info> infos{ CL_PROGRAM_BUILD_STATUS,
-                                              CL_PROGRAM_BUILD_OPTIONS,
-                                              CL_PROGRAM_BUILD_LOG,
-                                              CL_PROGRAM_BINARY_TYPE };
-    if (get_device_cl_version(device) >= Version(2, 0))
-    {
-    }
-    /* Try grabbing the infos after the compiler unload */
-    for (cl_program_build_info info : infos)
-    {
-        size_t info_size = 0;
-        err = clGetProgramBuildInfo(program, device, info, 0, nullptr,
-                                    &info_size);
-        if (CL_SUCCESS != err)
-        {
-            log_error("Test failure: clGetProgramBuildInfo() == %ld\n",
-                      static_cast<long int>(err));
-            return 1;
-        }
-        std::vector<char> info_value(info_size);
-        size_t written_size = 0;
-        err = clGetProgramBuildInfo(program, device, info, info_size,
-                                    &info_value[0], &written_size);
-        if (CL_SUCCESS != err)
-        {
-            log_error("Test failure: clGetProgramBuildInfo() == %ld\n",
-                      static_cast<long int>(err));
-            return 1;
-        }
-        else if (written_size != info_size)
-        {
-            log_error("Test failure: Written info value size (%zu) was "
-                      "different from "
-                      "queried size (%zu).\n",
-                      written_size, info_size);
-            return 1;
-        }
-        /* Verify the information we know the answer to */
-        switch (info)
-        {
-            case CL_PROGRAM_BUILD_STATUS: {
-                constexpr size_t value_size = sizeof(cl_build_status);
-                if (value_size != info_size)
-                {
-                    log_error("Test failure: Expected CL_PROGRAM_BUILD_STATUS "
-                              "of size %zu, "
-                              "but got %zu\n",
-                              value_size, info_size);
-                    return 1;
-                }
-                cl_build_status value;
-                memcpy(&value, &info_value[0], value_size);
-                if (CL_BUILD_SUCCESS != value)
-                {
-                    log_error(
-                        "Test failure: CL_PROGRAM_BUILD_STATUS did not return "
-                        "CL_BUILD_SUCCESS (%ld), but %ld\n",
-                        static_cast<long int>(CL_BUILD_SUCCESS),
-                        static_cast<long int>(value));
-                    return 1;
-                }
-            }
-            break;
-            case CL_PROGRAM_BUILD_OPTIONS: {
-                const size_t value_size = options.length() + 1;
-                if (value_size != info_size)
-                {
-                    log_error("Test failure: Expected CL_PROGRAM_BUILD_OPTIONS "
-                              "of size "
-                              "%zu, but got %zu\n",
-                              value_size, info_size);
-                    return 1;
-                }
-                else if (options != &info_value[0])
-                {
-                    log_error("Test failure: CL_PROGRAM_BUILD_OPTIONS returned "
-                              "\"%s\" "
-                              "instead of \"%s\"\n",
-                              &info_value[0], options.c_str());
-                    return 1;
-                }
-            }
-            break;
-            case CL_PROGRAM_BINARY_TYPE: {
-                constexpr size_t value_size = sizeof(cl_program_binary_type);
-                if (value_size != info_size)
-                {
-                    log_error("Test failure: Expected CL_PROGRAM_BINARY_TYPE "
-                              "of size %zu, "
-                              "but got %zu\n",
-                              value_size, info_size);
-                    return 1;
-                }
-                cl_program_binary_type value;
-                memcpy(&value, &info_value[0], value_size);
-                if (CL_PROGRAM_BINARY_TYPE_EXECUTABLE != value)
-                {
-                    log_error(
-                        "Test failure: CL_PROGRAM_BINARY_TYPE did not return "
-                        "CL_PROGRAM_BINARY_TYPE_EXECUTABLE (%ld), but %ld\n",
-                        static_cast<long int>(
-                            CL_PROGRAM_BINARY_TYPE_EXECUTABLE),
-                        static_cast<long int>(value));
-                    return 1;
-                }
-            }
-            break;
-        }
-    }
-    return 0;
-/* Test calling the unload function between program building and fetching the
- * program binaries */
-int test_unload_program_binaries(cl_device_id device, cl_context context,
-                                 cl_command_queue, int)
-    check_compiler_available(device);
-    const cl_platform_id platform = device_platform(device);
-    static const char *sources[] = { write_kernel_source };
-    cl_int err = CL_INVALID_PLATFORM;
-    /* Create and build the initial program from source */
-    const clProgramWrapper program =
-        clCreateProgramWithSource(context, 1, sources, nullptr, &err);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCreateProgramWithSource() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clCompileProgram() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    /* Unload the compiler */
-    err = clUnloadPlatformCompiler(platform);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clUnloadPlatformCompiler() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    /* Grab the built executable binary after the compiler unload */
-    size_t binary_size;
-    err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
-                           sizeof(binary_size), &binary_size, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clGetProgramInfo() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    std::vector<unsigned char> binary(binary_size);
-    unsigned char *binaries[] = { };
-    err = clGetProgramInfo(program, CL_PROGRAM_BINARIES,
-                           sizeof(unsigned char *), binaries, nullptr);
-    if (CL_SUCCESS != err)
-    {
-        log_error("Test failure: clGetProgramInfo() == %ld\n",
-                  static_cast<long int>(err));
-        return 1;
-    }
-    /* Create a new program from the binary and test its execution */
-    try
-    {
-        build_with_binary build_binary(context, device, binary);
-        build_binary.create();
-        build_binary.verify();
-    } catch (unload_test_failure &e)
-    {
-        log_error("Test failure: %s\n", e.what());
-        return 1;
-    }
-    return 0;
diff --git a/test_conformance/compiler/test_unload_platform_compiler_resources.hpp b/test_conformance/compiler/test_unload_platform_compiler_resources.hpp
deleted file mode 100644
index 82f87ff..0000000
--- a/test_conformance/compiler/test_unload_platform_compiler_resources.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <array>
-static const char write_kernel_source[] = R"(
-	kernel void write_kernel(global unsigned int *p) {
-		*p = 42;
-	})";
-/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm64 */
-static std::array<unsigned char, 216> write_kernel_64_spv{
-    { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
-      0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
-      0x0e, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-      0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
-      0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
-      0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
-      0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
-      0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-      0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
-      0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
-/* Assembled SPIR-V 1.0 binary from write_kernel.spvasm32 */
-static std::array<unsigned char, 216> write_kernel_32_spv{
-    { 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00,
-      0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00,
-      0x0e, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x0f, 0x00, 0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
-      0x77, 0x72, 0x69, 0x74, 0x65, 0x5f, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c,
-      0x00, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
-      0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00,
-      0x13, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
-      0x05, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x21, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x05, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x04, 0x00, 0x00, 0x00,
-      0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
-      0x37, 0x00, 0x03, 0x00, 0x05, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
-      0xf8, 0x00, 0x02, 0x00, 0x08, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x05, 0x00,
-      0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
-      0x04, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00 }
diff --git a/test_conformance/compiler/write_kernel.spvasm32 b/test_conformance/compiler/write_kernel.spvasm32
deleted file mode 100644
index b6a3fc1..0000000
--- a/test_conformance/compiler/write_kernel.spvasm32
+++ /dev/null
@@ -1,24 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 11
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-;         %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %6 "write_kernel"
-;        %10 = OpString "kernel_arg_type.write_kernel.uint*,"
-;              OpSource OpenCL_C 200000
-;              OpDecorate %7 FuncParamAttr NoCapture
-       %uint = OpTypeInt 32 0
-    %uint_42 = OpConstant %uint 42
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
-          %5 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-          %6 = OpFunction %void None %5
-          %7 = OpFunctionParameter %_ptr_CrossWorkgroup_uint
-          %8 = OpLabel
-               OpStore %7 %uint_42 Aligned 4
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/compiler/write_kernel.spvasm64 b/test_conformance/compiler/write_kernel.spvasm64
deleted file mode 100644
index 0923bc1..0000000
--- a/test_conformance/compiler/write_kernel.spvasm64
+++ /dev/null
@@ -1,24 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 11
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-;         %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %6 "write_kernel"
-;        %10 = OpString "kernel_arg_type.write_kernel.uint*,"
-;              OpSource OpenCL_C 200000
-;              OpDecorate %7 FuncParamAttr NoCapture
-       %uint = OpTypeInt 32 0
-    %uint_42 = OpConstant %uint 42
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
-          %5 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-          %6 = OpFunction %void None %5
-          %7 = OpFunctionParameter %_ptr_CrossWorkgroup_uint
-          %8 = OpLabel
-               OpStore %7 %uint_42 Aligned 4
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/computeinfo/CMakeLists.txt b/test_conformance/computeinfo/CMakeLists.txt
index 207223a..c952b17 100644
--- a/test_conformance/computeinfo/CMakeLists.txt
+++ b/test_conformance/computeinfo/CMakeLists.txt
@@ -2,9 +2,7 @@
-        device_uuid.cpp
-        conforming_version.cpp
diff --git a/test_conformance/computeinfo/conforming_version.cpp b/test_conformance/computeinfo/conforming_version.cpp
deleted file mode 100644
index 624cf85..0000000
--- a/test_conformance/computeinfo/conforming_version.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <regex>
-#include "harness/testHarness.h"
-#include "harness/deviceInfo.h"
-int test_conformance_version(cl_device_id deviceID, cl_context context,
-                             cl_command_queue ignoreQueue, int num_elements)
-    std::string version_string{ get_device_info_string(
-    // Latest conformance version passed should match vYYYY-MM-DD-XX, where XX
-    // is a number
-    std::regex valid_format("^v\\d{4}-(((0)[1-9])|((1)[0-2]))-((0)[1-9]|[1-2]["
-                            "0-9]|(3)[0-1])-\\d{2}$");
-    test_assert_error(
-        std::regex_match(version_string, valid_format),
-        "valid format vYYYY-MM-DD-XX");
-    return TEST_PASS;
diff --git a/test_conformance/computeinfo/device_uuid.cpp b/test_conformance/computeinfo/device_uuid.cpp
deleted file mode 100644
index 1ef9dad..0000000
--- a/test_conformance/computeinfo/device_uuid.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "harness/compat.h"
-#include <array>
-#include <bitset>
-#include "harness/testHarness.h"
-#include "harness/deviceInfo.h"
-using uuid = std::array<cl_uchar, CL_UUID_SIZE_KHR>;
-using luid = std::array<cl_uchar, CL_LUID_SIZE_KHR>;
-template <typename T> static void log_info_uuid(const T &id)
-    for (const cl_uchar c : id)
-    {
-        log_info("%02x", static_cast<unsigned>(c));
-    }
-template <typename T> static void log_error_uuid(const T &id)
-    for (const cl_uchar c : id)
-    {
-        log_error("%02x", static_cast<unsigned>(c));
-    }
-static bool check_device_info_returns(const cl_int err, const size_t size,
-                                      const size_t expected_size)
-    if (err != CL_SUCCESS)
-    {
-        print_error(err, "clGetDeviceInfo failed");
-        return false;
-    }
-    else if (size != expected_size)
-    {
-        log_error("Invalid size written by clGetDeviceInfo (%zu != %zu)\n",
-                  size, expected_size);
-        return false;
-    }
-    return true;
-template <typename T>
-static bool get_uuid(const cl_device_id device, const cl_device_info info,
-                     T &id, const bool twice = true)
-    const size_t id_size = id.size() * sizeof(id[0]);
-    size_t size_ret;
-    cl_int err = clGetDeviceInfo(device, info, id_size,, &size_ret);
-    if (!check_device_info_returns(err, size_ret, id_size))
-    {
-        return false;
-    }
-    /* Check that IDs are (at the very least) stable across two successive
-     * clGetDeviceInfo calls. Check conditionally, as it is undefined what the
-     * query for CL_DEVICE_LUID_KHR returns if CL_DEVICE_LUID_VALID_KHR returns
-     * false. */
-    if (twice)
-    {
-        T id_2;
-        size_t size_ret_2;
-        err = clGetDeviceInfo(device, info, id_size,, &size_ret_2);
-        if (!check_device_info_returns(err, size_ret_2, id_size))
-        {
-            return false;
-        }
-        if (id != id_2)
-        {
-            log_error("Got different IDs from the same ID device info (");
-            log_error_uuid(id);
-            log_error(" != ");
-            log_error_uuid(id_2);
-            log_error(")\n");
-            return false;
-        }
-    }
-    return true;
-int test_device_uuid(cl_device_id deviceID, cl_context context,
-                     cl_command_queue ignoreQueue, int num_elements)
-    if (!is_extension_available(deviceID, "cl_khr_device_uuid"))
-    {
-        log_info("cl_khr_device_uuid not supported. Skipping test...\n");
-        return 0;
-    }
-    int total_errors = 0;
-    uuid device_uuid;
-    bool success = get_uuid(deviceID, CL_DEVICE_UUID_KHR, device_uuid);
-    if (!success)
-    {
-        log_error("Error getting device UUID\n");
-        ++total_errors;
-    }
-    else
-    {
-        log_info("\tDevice UUID: ");
-        log_info_uuid(device_uuid);
-        log_info("\n");
-    }
-    uuid driver_uuid;
-    success = get_uuid(deviceID, CL_DRIVER_UUID_KHR, driver_uuid);
-    if (!success)
-    {
-        log_error("Error getting driver UUID\n");
-        ++total_errors;
-    }
-    else
-    {
-        log_info("\tDriver UUID: ");
-        log_info_uuid(driver_uuid);
-        log_info("\n");
-    }
-    size_t size_ret{};
-    cl_bool device_luid_valid{};
-    cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_LUID_VALID_KHR,
-                                 sizeof(device_luid_valid), &device_luid_valid,
-                                 &size_ret);
-    if (!check_device_info_returns(err, size_ret, sizeof(device_luid_valid)))
-    {
-        log_error("Error getting device LUID validity\n");
-        ++total_errors;
-        device_luid_valid = false;
-    }
-    else
-    {
-        log_info("\tDevice LUID validity is %s\n",
-                 device_luid_valid ? "true" : "false");
-    }
-    luid device_luid;
-    success =
-        get_uuid(deviceID, CL_DEVICE_LUID_KHR, device_luid, device_luid_valid);
-    if (!success)
-    {
-        log_error("Error getting device LUID\n");
-        ++total_errors;
-    }
-    else
-    {
-        log_info("\tDevice LUID: ");
-        log_info_uuid(device_luid);
-        log_info("\n");
-    }
-    cl_uint device_node_mask{};
-    err =
-        clGetDeviceInfo(deviceID, CL_DEVICE_NODE_MASK_KHR,
-                        sizeof(device_node_mask), &device_node_mask, &size_ret);
-    if (!check_device_info_returns(err, size_ret, sizeof(device_node_mask)))
-    {
-        log_error("Error getting device node mask\n");
-        ++total_errors;
-    }
-    else
-    {
-        log_info("\tNode mask  : %08lx\n",
-                 static_cast<unsigned long>(device_node_mask));
-        /* If the LUID is valid, there must be one and only one bit set in the
-         * node mask */
-        if (device_luid_valid)
-        {
-            static constexpr size_t cl_uint_size_in_bits = 32;
-            const size_t bit_count =
-                std::bitset<cl_uint_size_in_bits>(device_node_mask).count();
-            if (1 != bit_count)
-            {
-                log_error("Wrong amount of bits set in node mask (%zu != 1) "
-                          "with valid LUID\n",
-                          bit_count);
-                ++total_errors;
-            }
-        }
-    }
-    return total_errors;
diff --git a/test_conformance/computeinfo/extended_versioning.cpp b/test_conformance/computeinfo/extended_versioning.cpp
index 179e902..f9c29b9 100644
--- a/test_conformance/computeinfo/extended_versioning.cpp
+++ b/test_conformance/computeinfo/extended_versioning.cpp
@@ -17,7 +17,6 @@
 #include <vector>
 #include <set>
-#include <iterator>
 #include <algorithm>
 #include <cstring>
 #include "harness/testHarness.h"
@@ -243,8 +242,7 @@
 /* Check that CL_DEVICE{,_OPENCL_C}_NUMERIC_VERSION_KHR return the same versions
-static int test_extended_versioning_device_versions(bool ext,
-                                                    cl_device_id deviceID)
+static int test_extended_versioning_device_versions(cl_device_id deviceID)
     log_info("Device versions:\n");
@@ -261,15 +259,6 @@
     for (const auto& query : device_version_queries)
-        // CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR is only supported by
-        // cl_khr_extended_versioning:
-        if (!ext
-            && query.param_name_numeric
-        {
-            continue;
-        }
         const std::vector<char> version_string(
             get_device_string(deviceID, query.param_name_string));
         if (version_string.empty())
@@ -698,41 +687,14 @@
     return 0;
-// Assumes the core enums, structures, and macros exactly match
-// the extension enums, structures, and macros:
-              "CL_PLATFORM_NUMERIC_VERSION mismatch");
-              "CL_PLATFORM_EXTENSIONS_WITH_VERSION mismatch");
-              "CL_DEVICE_NUMERIC_VERSION mismatch");
-              "CL_DEVICE_EXTENSIONS_WITH_VERSION mismatch");
-              "CL_DEVICE_ILS_WITH_VERSION mismatch");
-              "CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION mismatch");
-static_assert(sizeof(cl_name_version) == sizeof(cl_name_version_khr),
-              "cl_name_version mismatch");
-static_assert(CL_MAKE_VERSION(1, 2, 3) == CL_MAKE_VERSION_KHR(1, 2, 3),
-              "CL_MAKE_VERSION mismatch");
 int test_extended_versioning(cl_device_id deviceID, cl_context context,
                              cl_command_queue ignoreQueue, int num_elements)
-    bool ext = is_extension_available(deviceID, "cl_khr_extended_versioning");
-    bool core = get_device_cl_version(deviceID) >= Version(3, 0);
-    if (!ext && !core)
+    if (!is_extension_available(deviceID, "cl_khr_extended_versioning"))
-        return TEST_SKIPPED_ITSELF;
+        log_info(
+            "cl_khr_extended_versioning not supported. Skipping test...\n");
+        return 0;
     cl_platform_id platform;
@@ -743,7 +705,7 @@
     int total_errors = 0;
     total_errors += test_extended_versioning_platform_version(platform);
     total_errors += test_extended_versioning_platform_extensions(platform);
-    total_errors += test_extended_versioning_device_versions(ext, deviceID);
+    total_errors += test_extended_versioning_device_versions(deviceID);
     total_errors += test_extended_versioning_device_extensions(deviceID);
     total_errors += test_extended_versioning_device_il(deviceID);
     total_errors += test_extended_versioning_device_built_in_kernels(deviceID);
diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp
index 4860b44..0bc04a9 100644
--- a/test_conformance/computeinfo/main.cpp
+++ b/test_conformance/computeinfo/main.cpp
@@ -59,10 +59,6 @@
-    type_cl_device_atomic_capabilities,
-    type_cl_device_device_enqueue_capabilities,
-    type_cl_name_version_array,
-    type_cl_name_version,
 typedef union {
@@ -80,10 +76,6 @@
     cl_ulong ull;
     char* string;
     cl_device_svm_capabilities svmCapabilities;
-    cl_device_atomic_capabilities atomicCapabilities;
-    cl_device_device_enqueue_capabilities deviceEnqueueCapabilities;
-    cl_name_version* cl_name_version_array;
-    cl_name_version cl_name_version_single;
 } config_data;
 struct _version
@@ -125,7 +117,6 @@
     const char* opcode_name;
     int config_type;
     config_data config;
-    size_t opcode_ret_size;
 } config_info;
 #define CONFIG_INFO(major, minor, opcode, type)                                \
@@ -227,7 +218,9 @@
     CONFIG_INFO(1, 1, CL_DEVICE_VENDOR, string),
     CONFIG_INFO(1, 1, CL_DRIVER_VERSION, string),
     CONFIG_INFO(1, 1, CL_DEVICE_PROFILE, string),
+    CONFIG_INFO(1, 1, CL_DEVICE_VERSION, string),
@@ -255,25 +248,6 @@
-                cl_device_atomic_capabilities),
-                cl_device_atomic_capabilities),
-                cl_uint),
-    CONFIG_INFO(3, 0, CL_DEVICE_OPENCL_C_FEATURES, cl_name_version_array),
-                cl_device_device_enqueue_capabilities),
-    CONFIG_INFO(3, 0, CL_DEVICE_NUMERIC_VERSION, cl_name_version),
-    CONFIG_INFO(3, 0, CL_DEVICE_EXTENSIONS_WITH_VERSION, cl_name_version_array),
-    CONFIG_INFO(3, 0, CL_DEVICE_OPENCL_C_ALL_VERSIONS, cl_name_version_array),
-    CONFIG_INFO(3, 0, CL_DEVICE_ILS_WITH_VERSION, cl_name_version_array),
-                cl_name_version_array),
 #define ENTRY(major, minor, T)                                                 \
@@ -396,62 +370,6 @@
     return num_errors;
-int getPlatformConfigInfo(cl_platform_id platform, config_info* info)
-    int err = CL_SUCCESS;
-    int size_err = 0;
-    size_t config_size_set;
-    size_t config_size_ret;
-    switch (info->config_type)
-    {
-        case type_string:
-            err = clGetPlatformInfo(platform, info->opcode, 0, NULL,
-                                    &config_size_set);
-            info->config.string = NULL;
-            if (err == CL_SUCCESS && config_size_set > 0)
-            {
-                info->config.string = (char*)malloc(config_size_set);
-                err = clGetPlatformInfo(platform, info->opcode, config_size_set,
-                                        info->config.string, &config_size_ret);
-                size_err = config_size_set != config_size_ret;
-            }
-            break;
-        case type_cl_name_version_array:
-            err = clGetPlatformInfo(platform, info->opcode, 0, NULL,
-                                    &config_size_set);
-            info->config.cl_name_version_array = NULL;
-            if (err == CL_SUCCESS && config_size_set > 0)
-            {
-                info->config.cl_name_version_array = (cl_name_version*)malloc(
-                    config_size_set * sizeof(cl_name_version));
-                err = clGetPlatformInfo(platform, info->opcode, config_size_set,
-                                        info->config.cl_name_version_array,
-                                        &config_size_ret);
-                size_err = config_size_set != config_size_ret;
-                info->opcode_ret_size = config_size_ret;
-            }
-            break;
-        case type_cl_name_version:
-            err = clGetPlatformInfo(platform, info->opcode, 0, NULL,
-                                    &config_size_set);
-            if (err == CL_SUCCESS && config_size_set > 0)
-            {
-                err = clGetPlatformInfo(platform, info->opcode, config_size_set,
-                                        &info->config.cl_name_version_single,
-                                        &config_size_ret);
-            }
-            size_err = config_size_set != config_size_ret;
-            break;
-        default:
-            log_error("Unknown config type: %d\n", info->config_type);
-            break;
-    }
-    if (err || size_err)
-        log_error("\tFailed clGetPlatformInfo for %s.\n", info->opcode_name);
-    if (err) print_error(err, "\t\tclGetPlatformInfo failed.");
-    if (size_err) log_error("\t\tWrong size return from clGetPlatformInfo.\n");
-    return err || size_err;
 int getConfigInfo(cl_device_id device, config_info* info)
@@ -551,43 +469,6 @@
                 device, info->opcode, sizeof(info->config.svmCapabilities),
                 &info->config.svmCapabilities, &config_size_ret);
-        case type_cl_device_device_enqueue_capabilities:
-            err = clGetDeviceInfo(
-                device, info->opcode,
-                sizeof(info->config.deviceEnqueueCapabilities),
-                &info->config.deviceEnqueueCapabilities, &config_size_ret);
-            break;
-        case type_cl_device_atomic_capabilities:
-            err = clGetDeviceInfo(
-                device, info->opcode, sizeof(info->config.atomicCapabilities),
-                &info->config.atomicCapabilities, &config_size_ret);
-            break;
-        case type_cl_name_version_array:
-            err = clGetDeviceInfo(device, info->opcode, 0, NULL,
-                                  &config_size_set);
-            info->config.cl_name_version_array = NULL;
-            if (err == CL_SUCCESS && config_size_set > 0)
-            {
-                info->config.cl_name_version_array = (cl_name_version*)malloc(
-                    config_size_set * sizeof(cl_name_version));
-                err = clGetDeviceInfo(device, info->opcode, config_size_set,
-                                      info->config.cl_name_version_array,
-                                      &config_size_ret);
-                size_err = config_size_set != config_size_ret;
-                info->opcode_ret_size = config_size_ret;
-            }
-            break;
-        case type_cl_name_version:
-            err = clGetDeviceInfo(device, info->opcode, 0, NULL,
-                                  &config_size_set);
-            if (err == CL_SUCCESS && config_size_set > 0)
-            {
-                err = clGetDeviceInfo(device, info->opcode, config_size_set,
-                                      &info->config.cl_name_version_single,
-                                      &config_size_ret);
-            }
-            size_err = config_size_set != config_size_ret;
-            break;
             log_error("Unknown config type: %d\n", info->config_type);
@@ -599,7 +480,7 @@
     return err || size_err;
-void dumpConfigInfo(config_info* info)
+void dumpConfigInfo(cl_device_id device, config_info* info)
     // We should not error if we find an unknown configuration since vendors
     // may specify their own options beyond the list in the specification.
@@ -826,111 +707,6 @@
                         (info->config.svmCapabilities & ~all_svm_capabilities));
-        case type_cl_device_device_enqueue_capabilities:
-            log_info("\t%s == %s|%s\n", info->opcode_name,
-                     (info->config.deviceEnqueueCapabilities
-                      & CL_DEVICE_QUEUE_SUPPORTED)
-                         ? "CL_DEVICE_QUEUE_SUPPORTED"
-                         : "",
-                     (info->config.deviceEnqueueCapabilities
-                         ? "CL_DEVICE_QUEUE_REPLACEABLE_DEFAULT"
-                         : "");
-            {
-                cl_device_device_enqueue_capabilities
-                    all_device_enqueue_capabilities = CL_DEVICE_QUEUE_SUPPORTED
-                if (info->config.deviceEnqueueCapabilities
-                    & ~all_device_enqueue_capabilities)
-                    log_info("WARNING: %s unknown bits found 0x%08" PRIX64,
-                             info->opcode_name,
-                             (info->config.deviceEnqueueCapabilities
-                              & ~all_device_enqueue_capabilities));
-            }
-            break;
-        case type_cl_device_atomic_capabilities:
-            log_info("\t%s == %s|%s|%s|%s|%s|%s|%s\n", info->opcode_name,
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_ORDER_RELAXED)
-                         ? "CL_DEVICE_ATOMIC_ORDER_RELAXED"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_ORDER_ACQ_REL)
-                         ? "CL_DEVICE_ATOMIC_ORDER_ACQ_REL"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_ORDER_SEQ_CST)
-                         ? "CL_DEVICE_ATOMIC_ORDER_SEQ_CST"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM)
-                         ? "CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP)
-                         ? "CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_SCOPE_DEVICE)
-                         ? "CL_DEVICE_ATOMIC_SCOPE_DEVICE"
-                         : "",
-                     (info->config.atomicCapabilities
-                      & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES)
-                         ? "CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES"
-                         : "");
-            {
-                cl_device_atomic_capabilities all_atomic_capabilities =
-                    | CL_DEVICE_ATOMIC_ORDER_ACQ_REL
-                    | CL_DEVICE_ATOMIC_ORDER_SEQ_CST
-                    | CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM
-                    | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP
-                    | CL_DEVICE_ATOMIC_SCOPE_DEVICE
-                    | CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES;
-                if (info->config.atomicCapabilities & ~all_atomic_capabilities)
-                    log_info("WARNING: %s unknown bits found 0x%08" PRIX64,
-                             info->opcode_name,
-                             (info->config.atomicCapabilities
-                              & ~all_atomic_capabilities));
-            }
-            break;
-        case type_cl_name_version_array: {
-            int number_of_version_items = info->opcode_ret_size
-                / sizeof(*info->config.cl_name_version_array);
-            log_info("\t%s supported name and version:\n", info->opcode_name);
-            if (number_of_version_items == 0)
-            {
-                log_info("\t\t\"\"\n");
-            }
-            else
-            {
-                for (int f = 0; f < number_of_version_items; f++)
-                {
-                    cl_name_version new_version_item =
-                        info->config.cl_name_version_array[f];
-                    cl_version new_version_major =
-                        CL_VERSION_MAJOR_KHR(new_version_item.version);
-                    cl_version new_version_minor =
-                        CL_VERSION_MINOR_KHR(new_version_item.version);
-                    cl_version new_version_patch =
-                        CL_VERSION_PATCH_KHR(new_version_item.version);
-                    log_info("\t\t\"%s\" %d.%d.%d\n",,
-                             CL_VERSION_MAJOR_KHR(new_version_item.version),
-                             CL_VERSION_MINOR_KHR(new_version_item.version),
-                             CL_VERSION_PATCH_KHR(new_version_item.version));
-                }
-            }
-            break;
-        }
-        case type_cl_name_version:
-            log_info("\t%s == %d.%d.%d\n", info->opcode_name,
-                     CL_VERSION_MAJOR_KHR(
-                         info->config.cl_name_version_single.version),
-                     CL_VERSION_MINOR_KHR(
-                         info->config.cl_name_version_single.version),
-                     CL_VERSION_PATCH_KHR(
-                         info->config.cl_name_version_single.version));
-            break;
@@ -1125,7 +901,7 @@
             err = getConfigInfo(device, &info);
             if (!err)
-                dumpConfigInfo(&info);
+                dumpConfigInfo(device, &info);
                 if (info.opcode == CL_DEVICE_VERSION)
                     err = parseVersion(info.config.string, &version);
@@ -1150,10 +926,6 @@
-                if (info.config_type == type_cl_name_version_array)
-                {
-                    free(info.config.cl_name_version_array);
-                }
@@ -1179,7 +951,7 @@
                 err = getConfigInfo(device, &info);
                 if (!err)
-                    dumpConfigInfo(&info);
+                    dumpConfigInfo(device, &info);
@@ -1194,67 +966,6 @@
     return total_errors;
-config_info config_platform_infos[] = {
-    // CL_PLATFORM_VERSION has to be first defined with version 0 0.
-    CONFIG_INFO(1, 1, CL_PLATFORM_NAME, string),
-                cl_name_version_array),
-    CONFIG_INFO(3, 0, CL_PLATFORM_NUMERIC_VERSION, cl_name_version)
-int getPlatformCapabilities(cl_platform_id platform)
-    int total_errors = 0;
-    version_t version = { 0, 0 }; // Version of the device. Will get real value
-                                  // on the first loop iteration.
-    int err;
-    for (unsigned onConfigInfo = 0; onConfigInfo
-         < sizeof(config_platform_infos) / sizeof(config_platform_infos[0]);
-         onConfigInfo++)
-    {
-        config_info info = config_platform_infos[onConfigInfo];
-        if (vercmp(version, info.version) >= 0)
-        {
-            err = getPlatformConfigInfo(platform, &info);
-            if (!err)
-            {
-                dumpConfigInfo(&info);
-                if (info.opcode == CL_PLATFORM_VERSION)
-                {
-                    err = parseVersion(info.config.string, &version);
-                    if (err)
-                    {
-                        total_errors++;
-                        free(info.config.string);
-                        break;
-                    }
-                }
-                if (info.config_type == type_string)
-                {
-                    free(info.config.string);
-                }
-                if (info.config_type == type_cl_name_version_array)
-                {
-                    free(info.config.cl_name_version_array);
-                }
-            }
-            else
-            {
-                total_errors++;
-            }
-        }
-        else
-        {
-            log_info("\tSkipped: %s.\n", info.opcode_name);
-        }
-    }
-    return total_errors;
 int test_computeinfo(cl_device_id deviceID, cl_context context,
                      cl_command_queue ignoreQueue, int num_elements)
@@ -1265,11 +976,23 @@
     err = clGetPlatformIDs(1, &platform, NULL);
     test_error(err, "clGetPlatformIDs failed");
+    if (err != CL_SUCCESS)
+    {
+        total_errors++;
+    }
     // print platform info
-    err = getPlatformCapabilities(platform);
-    test_error(err, "getPlatformCapabilities failed");
+    print_platform_string_selector(platform, "CL_PLATFORM_PROFILE",
+                                   CL_PLATFORM_PROFILE);
+    print_platform_string_selector(platform, "CL_PLATFORM_VERSION",
+                                   CL_PLATFORM_VERSION);
+    print_platform_string_selector(platform, "CL_PLATFORM_NAME",
+                                   CL_PLATFORM_NAME);
+    print_platform_string_selector(platform, "CL_PLATFORM_VENDOR",
+                                   CL_PLATFORM_VENDOR);
+    print_platform_string_selector(platform, "CL_PLATFORM_EXTENSIONS",
+                                   CL_PLATFORM_EXTENSIONS);
     // Check to see if this test is being run on a specific device
@@ -1420,16 +1143,10 @@
 extern int test_extended_versioning(cl_device_id, cl_context, cl_command_queue,
-extern int test_device_uuid(cl_device_id, cl_context, cl_command_queue, int);
-extern int test_conformance_version(cl_device_id, cl_context, cl_command_queue,
-                                    int);
 test_definition test_list[] = {
-    ADD_TEST(device_uuid),
-    ADD_TEST_VERSION(conformance_version, Version(3, 0)),
 const int test_num = ARRAY_SIZE(test_list);
@@ -1459,5 +1176,6 @@
-    return runTestHarness(argCount, argList, test_num, test_list, true, 0);
+    return runTestHarness(argCount, argList, test_num, test_list, false, true,
+                          0);
diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
index dddebb4..6d80dee 100644
--- a/test_conformance/contractions/contractions.cpp
+++ b/test_conformance/contractions/contractions.cpp
@@ -576,39 +576,56 @@
         "\n" };
-    for (i = 0; i < sizeof(sizeNames) / sizeof(sizeNames[0]); i++)
+    for( i = 0; i < sizeof( sizeNames ) / sizeof( sizeNames[0] ); i++ )
-        size_t strCount = sizeof(kernels) / sizeof(kernels[0]);
+        size_t strCount = sizeof( kernels ) / sizeof( kernels[0] );
         kernels[0] = "";
-        for (j = 2; j < strCount; j += 2) kernels[j] = sizeNames[i];
-        error = create_single_kernel_helper(gContext, &gProgram[i], nullptr,
-                                            strCount, kernels, nullptr);
-        if (CL_SUCCESS != error || nullptr == gProgram[i])
+        for( j = 2; j < strCount; j += 2 )
+            kernels[j] = sizeNames[i];
+        gProgram[i] = clCreateProgramWithSource(gContext, strCount, kernels, NULL, &error);
+        if( NULL == gProgram[i] )
-            log_error("Error: Unable to create test program! (%s) (in %s:%d)\n",
-                      IGetErrorString(error), __FILE__, __LINE__);
+            vlog_error( "clCreateProgramWithSource failed\n" );
+            return TEST_FAIL;
+        }
+        if(( error = clBuildProgram(gProgram[i], 1, &device, NULL, NULL, NULL) ))
+        {
+            vlog_error( "clBuildProgramExecutable failed\n" );
+            char build_log[2048] = "";
+            clGetProgramBuildInfo(gProgram[i], device, CL_PROGRAM_BUILD_LOG, sizeof(build_log), build_log, NULL);
+            vlog_error( "Log:\n%s\n", build_log );
             return TEST_FAIL;
-    if (gHasDouble)
+    if( gHasDouble )
         kernels[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-        for (i = 0; i < sizeof(sizeNames_double) / sizeof(sizeNames_double[0]);
-             i++)
+        for( i = 0; i < sizeof( sizeNames_double ) / sizeof( sizeNames_double[0] ); i++ )
-            size_t strCount = sizeof(kernels) / sizeof(kernels[0]);
+            size_t strCount = sizeof( kernels ) / sizeof( kernels[0] );
-            for (j = 2; j < strCount; j += 2) kernels[j] = sizeNames_double[i];
-            error = create_single_kernel_helper(gContext, &gProgram_double[i],
-                                                nullptr, strCount, kernels,
-                                                nullptr);
-            if (CL_SUCCESS != error || nullptr == gProgram_double[i])
+            for( j = 2; j < strCount; j += 2 )
+                kernels[j] = sizeNames_double[i];
+            gProgram_double[i] = clCreateProgramWithSource(gContext, strCount, kernels, NULL, &error);
+            if( NULL == gProgram_double[i] )
-                log_error(
-                    "Error: Unable to create test program! (%s) (in %s:%d)\n",
-                    IGetErrorString(error), __FILE__, __LINE__);
+                vlog_error( "clCreateProgramWithSource failed\n" );
+                return TEST_FAIL;
+            }
+            if(( error = clBuildProgram(gProgram_double[i], 1, &device, NULL, NULL, NULL) ))
+            {
+                vlog_error( "clBuildProgramExecutable failed\n" );
+                char build_log[2048] = "";
+                clGetProgramBuildInfo(gProgram_double[i], device, CL_PROGRAM_BUILD_LOG, sizeof(build_log), build_log, NULL);
+                vlog_error( "Log:\n%s\n", build_log );
                 return TEST_FAIL;
diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt
index 523b6ea..2dd0d83 100644
--- a/test_conformance/conversions/CMakeLists.txt
+++ b/test_conformance/conversions/CMakeLists.txt
@@ -4,7 +4,7 @@
       Sleep.cpp test_conversions.cpp basic_test_conversions.cpp
     list(APPEND ${MODULE_NAME}_SOURCES fplib.cpp)
diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp
index 3299884..d32694a 100644
--- a/test_conformance/conversions/basic_test_conversions.cpp
+++ b/test_conformance/conversions/basic_test_conversions.cpp
@@ -21,11 +21,11 @@
 #include "harness/mt19937.h"
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+#if defined( __arm__ ) && defined( __GNUC__ )
 #include "fplib.h"
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+#if defined( __arm__ ) && defined( __GNUC__ )
 /* Rounding modes and saturation for use with qcom 64 bit to float conversion library */
     bool            qcom_sat;
     roundingMode    qcom_rm;
@@ -678,8 +678,7 @@
 static void uint2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uint*) in)[0]; }
 static void uint2float( void *out, void *in)
-    // Use volatile to prevent optimization by Clang compiler
-    volatile cl_uint l = ((cl_uint *)in)[0];
+    cl_uint l = ((cl_uint*) in)[0];
     ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
 static void uint2double( void *out, void *in)
@@ -760,18 +759,12 @@
     ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
     cl_ulong l = ((cl_ulong*) in)[0];
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
-    /* ARM VFP doesn't have hardware instruction for converting from 64-bit
-     * integer to float types, hence GCC ARM uses the floating-point emulation
-     * code despite which -mfloat-abi setting it is. But the emulation code in
-     * libgcc.a has only one rounding mode (round to nearest even in this case)
+#if defined( __arm__ ) && defined( __GNUC__ )
+    /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
+     * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
      * and ignores the user rounding mode setting in hardware.
-     * As a result setting rounding modes in hardware won't give correct
-     * rounding results for type covert from 64-bit integer to float using GCC
-     * for ARM compiler so for testing different rounding modes, we need to use
-     * alternative reference function. ARM64 does have an instruction, however
-     * we cannot guarantee the compiler will use it.  On all ARM architechures
-     * use emulation to calculate reference.*/
+     * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
+     * so for testing different rounding modes, we need to use alternative reference function */
     ((float*) out)[0] = qcom_u64_2_f32(l, qcom_sat, qcom_rm);
     ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
@@ -792,8 +785,7 @@
     ((double*) out)[0] = (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result));
-    // Use volatile to prevent optimization by Clang compiler
-    volatile cl_ulong l = ((cl_ulong *)in)[0];
+    cl_ulong l = ((cl_ulong*) in)[0];
     ((double*) out)[0] = (l == 0 ? 0.0 : (double) l);      // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
@@ -814,18 +806,12 @@
     ((float*) out)[0] = (l == 0 ? 0.0f : result);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
     cl_long l = ((cl_long*) in)[0];
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
-    /* ARM VFP doesn't have hardware instruction for converting from 64-bit
-     * integer to float types, hence GCC ARM uses the floating-point emulation
-     * code despite which -mfloat-abi setting it is. But the emulation code in
-     * libgcc.a has only one rounding mode (round to nearest even in this case)
+#if defined( __arm__ ) && defined( __GNUC__ )
+    /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
+     * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
      * and ignores the user rounding mode setting in hardware.
-     * As a result setting rounding modes in hardware won't give correct
-     * rounding results for type covert from 64-bit integer to float using GCC
-     * for ARM compiler so for testing different rounding modes, we need to use
-     * alternative reference function. ARM64 does have an instruction, however
-     * we cannot guarantee the compiler will use it.  On all ARM architechures
-     * use emulation to calculate reference.*/
+     * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
+     * so for testing different rounding modes, we need to use alternative reference function */
     ((float*) out)[0] = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm));
     ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
diff --git a/test_conformance/conversions/fplib.cpp b/test_conformance/conversions/fplib.cpp
index e739b9a..a18b919 100644
--- a/test_conformance/conversions/fplib.cpp
+++ b/test_conformance/conversions/fplib.cpp
@@ -17,9 +17,7 @@
 #include <math.h>
 #include "fplib.h"
-#if !defined(FLT_MANT_DIG)
 #define FLT_MANT_DIG    24
 #define as_float(x)     (*((float *)(&x)))
 #define as_long(x)      (*((int64_t *)(&x)))
@@ -29,8 +27,8 @@
     for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++)
-        volatile uint64_t v = 0x8000000000000000ull & (value << num_zeros);
-        if (v) break;
+        if(0x8000000000000000 & (value << num_zeros))
+            break;
     return num_zeros;
@@ -147,9 +145,6 @@
                     return as_float(result);
-        case qcomRoundingModeCount: {
-            break; // Avoid build error for unhandled enum value
-        }
     return 0.0f;
@@ -221,9 +216,6 @@
             uint32_t result = exponent | mantissa;
             return as_float(result); // for positive inputs return RTZ result
-        case qcomRoundingModeCount: {
-            break; // Avoid build error for unhandled enum value
-        }
     return 0.0f;
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 87b8ead..d73df06 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -19,7 +19,7 @@
 #include "harness/testHarness.h"
 #include "harness/kernelHelpers.h"
 #include "harness/parseParameters.h"
-#if defined(__APPLE__)
+#if !defined(_WIN32) && !defined(__ANDROID__)
 #include <sys/sysctl.h>
@@ -50,6 +50,8 @@
 #include "Sleep.h"
 #include "basic_test_conversions.h"
 #if (defined(_WIN32) && defined (_MSC_VER))
 // need for _controlfp_s and rouinding modes in RoundingMode
 #include "harness/testHarness.h"
@@ -65,7 +67,7 @@
 #define      kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */)
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+#if defined( __arm__ ) && defined( __GNUC__ )
 #include "fplib.h"
     extern bool            qcom_sat;
     extern roundingMode    qcom_rm;
@@ -317,11 +319,10 @@
     int ret = runTestHarnessWithCheck( 1, arg, test_num, test_list, true, 0, InitCL );
     free_mtdata( gMTdata );
-    if (gQueue)
-    {
-        error = clFinish(gQueue);
-        if (error) vlog_error("clFinish failed: %d\n", error);
-    }
+    error = clFinish(gQueue);
+    if (error)
+        vlog_error("clFinish failed: %d\n", error);
@@ -884,18 +885,12 @@
         if( info->sat )
             f = gSaturatedConversions[ outType ][ inType ];
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
-        /* ARM VFP doesn't have hardware instruction for converting from 64-bit
-         * integer to float types, hence GCC ARM uses the floating-point
-         * emulation code despite which -mfloat-abi setting it is. But the
-         * emulation code in libgcc.a has only one rounding mode (round to
-         * nearest even in this case) and ignores the user rounding mode setting
-         * in hardware. As a result setting rounding modes in hardware won't
-         * give correct rounding results for type covert from 64-bit integer to
-         * float using GCC for ARM compiler so for testing different rounding
-         * modes, we need to use alternative reference function. ARM64 does have
-         * an instruction, however we cannot guarantee the compiler will use it.
-         * On all ARM architechures use emulation to calculate reference.*/
+#if defined( __arm__ ) && defined( __GNUC__ )
+       /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
+        * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
+        * and ignores the user rounding mode setting in hardware.
+        * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
+        * so for testing different rounding modes, we need to use alternative reference function */
         switch (round)
             /* conversions to floating-point type use the current rounding mode.
diff --git a/test_conformance/d3d10/harness.cpp b/test_conformance/d3d10/harness.cpp
index 93f2281..ffdfea5 100644
--- a/test_conformance/d3d10/harness.cpp
+++ b/test_conformance/d3d10/harness.cpp
@@ -367,12 +367,41 @@
         const char *sourceTexts[] = {source};
         size_t sourceLengths[] = {strlen(source) };
-        status = create_single_kernel_helper(context, &program, &kernel, 1,
-                                             &sourceTexts[0], entrypoint);
+        status = create_single_kernel_helper_create_program(context, &program, 1, &sourceTexts[0]);
             CL_SUCCESS == status,
             "clCreateProgramWithSource failed");
+    status = clBuildProgram(
+        program,
+        0,
+        NULL,
+        NULL,
+        NULL,
+        NULL);
+    if (CL_SUCCESS != status)
+    {
+        char log[2048] = {0};
+        status = clGetProgramBuildInfo(
+            program,
+            device,
+            CL_PROGRAM_BUILD_LOG,
+            sizeof(log),
+            log,
+            NULL);
+        TestPrint("error: %s\n", log);
+        TestRequire(
+            CL_SUCCESS == status,
+            "Compilation error log:\n%s\n", log);
+    }
+    kernel = clCreateKernel(
+        program,
+        entrypoint,
+        &status);
+    TestRequire(
+        CL_SUCCESS == status,
+        "clCreateKernel failed");
     *outKernel = kernel;
diff --git a/test_conformance/d3d11/harness.cpp b/test_conformance/d3d11/harness.cpp
index 90ba200..687c6da 100644
--- a/test_conformance/d3d11/harness.cpp
+++ b/test_conformance/d3d11/harness.cpp
@@ -400,10 +400,41 @@
         const char *sourceTexts[] = {source};
         size_t sourceLengths[] = {strlen(source) };
-        status = create_single_kernel_helper(context, &program, &kernel, 1,
-                                             &sourceTexts[0], entrypoint);
-        TestRequire(CL_SUCCESS == status, "Kernel creation failed");
+        status = create_single_kernel_helper_create_program(context, &program, 1, &sourceTexts[0]);
+        TestRequire(
+            CL_SUCCESS == status,
+            "clCreateProgramWithSource failed");
+    status = clBuildProgram(
+        program,
+        0,
+        NULL,
+        NULL,
+        NULL,
+        NULL);
+    if (CL_SUCCESS != status)
+    {
+        char log[2048] = {0};
+        status = clGetProgramBuildInfo(
+            program,
+            device,
+            CL_PROGRAM_BUILD_LOG,
+            sizeof(log),
+            log,
+            NULL);
+        TestPrint("error: %s\n", log);
+        TestRequire(
+            CL_SUCCESS == status,
+            "Compilation error log:\n%s\n", log);
+    }
+    kernel = clCreateKernel(
+        program,
+        entrypoint,
+        &status);
+    TestRequire(
+        CL_SUCCESS == status,
+        "clCreateKernel failed");
     *outKernel = kernel;
diff --git a/test_conformance/device_execution/CMakeLists.txt b/test_conformance/device_execution/CMakeLists.txt
index 5e9e30e..d99ba21 100644
--- a/test_conformance/device_execution/CMakeLists.txt
+++ b/test_conformance/device_execution/CMakeLists.txt
@@ -8,7 +8,6 @@
-    enqueue_profiling.cpp
diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp
index 8ced662..84ac339 100644
--- a/test_conformance/device_execution/enqueue_ndrange.cpp
+++ b/test_conformance/device_execution/enqueue_ndrange.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -27,316 +27,271 @@
 #ifdef CL_VERSION_2_0
 extern int gWimpyMode;
-static const char *helper_ndrange_1d_glo[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
-    "atomic_uint* val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int i = 0; i < n; i++)" NL,
-    "  {" NL,
-    "    ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);" NL,
-    "    int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_1d_glo[] =
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int i = 0; i < n; i++)"
+    NL, "  {"
+    NL, "    ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);"
+    NL, "    int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL
-static const char *helper_ndrange_1d_loc[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
-    "atomic_uint* val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int k = 0; k < n; k++)" NL,
-    "  {" NL,
-    "    for(int i = 0; i < n; i++)" NL,
-    "    {" NL,
-    "      if (glob_size_arr[i] >= loc_size_arr[k])" NL,
-    "      {" NL,
-    "        ndrange_t ndrange = ndrange_1D(glob_size_arr[i], "
-    "loc_size_arr[k]);" NL,
-    "        int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_1d_loc[] =
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int k = 0; k < n; k++)"
+    NL, "  {"
+    NL, "    for(int i = 0; i < n; i++)"
+    NL, "    {"
+    NL, "      if (glob_size_arr[i] >= loc_size_arr[k])"
+    NL, "      {"
+    NL, "        ndrange_t ndrange = ndrange_1D(glob_size_arr[i], loc_size_arr[k]);"
+    NL, "        int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
-static const char *helper_ndrange_1d_ofs[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[(get_global_offset(0) + "
-    "get_global_linear_id()) % len], 1u, memory_order_relaxed, "
-    "memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
-    "atomic_uint* val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int l = 0; l < n; l++)" NL,
-    "  {" NL,
-    "    for(int k = 0; k < n; k++)" NL,
-    "    {" NL,
-    "      for(int i = 0; i < n; i++)" NL,
-    "      {" NL,
-    "        if (glob_size_arr[i] >= loc_size_arr[k])" NL,
-    "        {" NL,
-    "          ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], "
-    "loc_size_arr[k]);" NL,
-    "          int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "        }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_1d_ofs[] =
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[(get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int l = 0; l < n; l++)"
+    NL, "  {"
+    NL, "    for(int k = 0; k < n; k++)"
+    NL, "    {"
+    NL, "      for(int i = 0; i < n; i++)"
+    NL, "      {"
+    NL, "        if (glob_size_arr[i] >= loc_size_arr[k])"
+    NL, "        {"
+    NL, "          ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], loc_size_arr[k]);"
+    NL, "          int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
-static const char *helper_ndrange_2d_glo[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int i = 0; i < n; i++)" NL,
-    "  {" NL,
-    "    size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] "
-    "};" NL,
-    "    ndrange_t ndrange = ndrange_2D(glob_size);" NL,
-    "    int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_2d_glo[] =
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int i = 0; i < n; i++)"
+    NL, "  {"
+    NL, "    size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
+    NL, "    ndrange_t ndrange = ndrange_2D(glob_size);"
+    NL, "    int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL
-static const char *helper_ndrange_2d_loc[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int k = 0; k < n; k++)" NL,
-    "  {" NL,
-    "    for(int i = 0; i < n; i++)" NL,
-    "    {" NL,
-    "      if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL,
-    "      {" NL,
-    "        size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
-    "n] };" NL,
-    "        size_t loc_size[] = { 1, loc_size_arr[k] };" NL,
-    "" NL,
-    "        ndrange_t ndrange = ndrange_2D(glob_size, loc_size);" NL,
-    "        int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_2d_loc[] =
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int k = 0; k < n; k++)"
+    NL, "  {"
+    NL, "    for(int i = 0; i < n; i++)"
+    NL, "    {"
+    NL, "      if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
+    NL, "      {"
+    NL, "        size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
+    NL, "        size_t loc_size[] = { 1, loc_size_arr[k] };"
+    NL, ""
+    NL, "        ndrange_t ndrange = ndrange_2D(glob_size, loc_size);"
+    NL, "        int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
-static const char *helper_ndrange_2d_ofs[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[(get_global_offset(1) * "
-    "get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % "
-    "len], 1u, memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int l = 0; l < n; l++)" NL,
-    "  {" NL,
-    "    for(int k = 0; k < n; k++)" NL,
-    "    {" NL,
-    "      for(int i = 0; i < n; i++)" NL,
-    "      {" NL,
-    "        if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])" NL,
-    "        {" NL,
-    "          size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) "
-    "% n]};" NL,
-    "          size_t loc_size[] = { 1, loc_size_arr[k] };" NL,
-    "          size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };" NL,
-    "" NL,
-    "          ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);" NL,
-    "          int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "        }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_2d_ofs[] =
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[(get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int l = 0; l < n; l++)"
+    NL, "  {"
+    NL, "    for(int k = 0; k < n; k++)"
+    NL, "    {"
+    NL, "      for(int i = 0; i < n; i++)"
+    NL, "      {"
+    NL, "        if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
+    NL, "        {"
+    NL, "          size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n]};"
+    NL, "          size_t loc_size[] = { 1, loc_size_arr[k] };"
+    NL, "          size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };"
+    NL, ""
+    NL, "          ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);"
+    NL, "          int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
-static const char *helper_ndrange_3d_glo[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int i = 0; i < n; i++)" NL,
-    "  {" NL,
-    "    uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % "
-    "n] * glob_size_arr[(i + 2) % n];" NL,
-    "    if (global_work_size <= (len * len))" NL,
-    "    {" NL,
-    "      size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
-    "n], glob_size_arr[(i + 2) % n] };" NL,
-    "      ndrange_t ndrange = ndrange_3D(glob_size);" NL,
-    "      int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "      if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_3d_glo[] =
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int i = 0; i < n; i++)"
+    NL, "  {"
+    NL, "    uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
+    NL, "    if (global_work_size <= (len * len))"
+    NL, "    {"
+    NL, "      size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
+    NL, "      ndrange_t ndrange = ndrange_3D(glob_size);"
+    NL, "      int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
-static const char *helper_ndrange_3d_loc[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1u, "
-    "memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int k = 0; k < n; k++)" NL,
-    "  {" NL,
-    "    for(int i = 0; i < n; i++)" NL,
-    "    {" NL,
-    "      uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % "
-    "n] * glob_size_arr[(i + 2) % n];" NL,
-    "      if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && "
-    "global_work_size <= (len * len))" NL,
-    "      {" NL,
-    "        size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % "
-    "n], glob_size_arr[(i + 2) % n] };" NL,
-    "        size_t loc_size[] = { 1, 1, loc_size_arr[k] };" NL,
-    "        ndrange_t ndrange = ndrange_3D(glob_size,loc_size);" NL,
-    "        int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "      " NL,
-    "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_3d_loc[] =
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int k = 0; k < n; k++)"
+    NL, "  {"
+    NL, "    for(int i = 0; i < n; i++)"
+    NL, "    {"
+    NL, "      uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
+    NL, "      if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
+    NL, "      {"
+    NL, "        size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
+    NL, "        size_t loc_size[] = { 1, 1, loc_size_arr[k] };"
+    NL, "        ndrange_t ndrange = ndrange_3D(glob_size,loc_size);"
+    NL, "        int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      "
+    NL, "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
-static const char *helper_ndrange_3d_ofs[] = {
-    NL,
-    "void block_fn(int len, __global atomic_uint* val)" NL,
-    "{" NL,
-    "  atomic_fetch_add_explicit(&val[(get_global_offset(2) * "
-    "get_global_size(0) * get_global_size(1) + get_global_offset(1) * "
-    "get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % "
-    "len], 1u, memory_order_relaxed, memory_scope_device);" NL,
-    "}" NL,
-    "" NL,
-    "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
-    "{" NL,
-    "  size_t tid = get_global_id(0);" NL,
-    "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
-    "" NL,
-    "  for(int l = 0; l < n; l++)" NL,
-    "  {" NL,
-    "    for(int k = 0; k < n; k++)" NL,
-    "    {" NL,
-    "      for(int i = 0; i < n; i++)" NL,
-    "      {" NL,
-    "        uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) "
-    "% n] * glob_size_arr[(i + 2) % n];" NL,
-    "        if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && "
-    "global_work_size <= (len * len))" NL,
-    "        {" NL,
-    "          size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) "
-    "% n], glob_size_arr[(i + 2) % n]};" NL,
-    "          size_t loc_size[3] = { 1, 1, loc_size_arr[k] };" NL,
-    "          size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l "
-    "+ 2) % n] };" NL,
-    "          ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);" NL,
-    "          int enq_res = enqueue_kernel(get_default_queue(), "
-    "CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" NL,
-    "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" NL,
-    "        }" NL,
-    "      }" NL,
-    "    }" NL,
-    "  }" NL,
-    "}" NL
+static const char* helper_ndrange_3d_ofs[] =
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[(get_global_offset(2) * get_global_size(0) * get_global_size(1) + get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int l = 0; l < n; l++)"
+    NL, "  {"
+    NL, "    for(int k = 0; k < n; k++)"
+    NL, "    {"
+    NL, "      for(int i = 0; i < n; i++)"
+    NL, "      {"
+    NL, "        uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
+    NL, "        if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
+    NL, "        {"
+    NL, "          size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n]};"
+    NL, "          size_t loc_size[3] = { 1, 1, loc_size_arr[k] };"
+    NL, "          size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l + 2) % n] };"
+    NL, "          ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);"
+    NL, "          int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
 static const kernel_src_dim_check sources_ndrange_Xd[] =
diff --git a/test_conformance/device_execution/enqueue_profiling.cpp b/test_conformance/device_execution/enqueue_profiling.cpp
deleted file mode 100644
index b9e1a17..0000000
--- a/test_conformance/device_execution/enqueue_profiling.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <stdio.h>
-#include <string.h>
-#include "harness/testHarness.h"
-#include "harness/typeWrappers.h"
-#include <vector>
-#include "procs.h"
-#include "utils.h"
-#include <time.h>
-static int max_nestingLevel = 10;
-static const char* enqueue_multi_level = R"(
-    void block_fn(__global int* res, int level)
-    {
-      queue_t def_q = get_default_queue();
-      if(--level < 0) return;
-      void (^kernelBlock)(void) = ^{ block_fn(res, level); };
-      ndrange_t ndrange = ndrange_1D(1);
-      int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
-      if(enq_res != CLK_SUCCESS) { (*res) = -1; return; }
-      else if(*res != -1) { (*res)++; }
-    }
-    kernel void enqueue_multi_level(__global int* res, int level)
-    {
-      *res = 0;
-      block_fn(res, level);
-    })";
-int test_enqueue_profiling(cl_device_id device, cl_context context,
-                           cl_command_queue queue, int num_elements)
-    cl_int err_ret, res = 0;
-    clCommandQueueWrapper dev_queue;
-    clCommandQueueWrapper host_queue;
-    cl_uint maxQueueSize = 0;
-    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
-                              sizeof(maxQueueSize), &maxQueueSize, 0);
-    test_error(err_ret,
-               "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
-    cl_queue_properties dev_queue_prop_def[] = {
-        CL_QUEUE_SIZE, maxQueueSize, 0
-    };
-    dev_queue = clCreateCommandQueueWithProperties(
-        context, device, dev_queue_prop_def, &err_ret);
-    test_error(err_ret,
-               "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_"
-               "DEFAULT) failed");
-    cl_queue_properties host_queue_prop_def[] = { CL_QUEUE_PROPERTIES,
-                                                  CL_QUEUE_PROFILING_ENABLE,
-                                                  0 };
-    host_queue = clCreateCommandQueueWithProperties(
-        context, device, host_queue_prop_def, &err_ret);
-    test_error(err_ret,
-               "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_"
-               "DEFAULT) failed");
-    cl_int status;
-    size_t size = 1;
-    cl_int result = 0;
-    clMemWrapper res_mem;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_event kernel_event;
-    err_ret = create_single_kernel_helper(context, &program, &kernel, 1,
-                                          &enqueue_multi_level,
-                                          "enqueue_multi_level");
-    if (check_error(err_ret, "Create single kernel failed")) return -1;
-    res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                             sizeof(result), &result, &err_ret);
-    test_error(err_ret, "clCreateBuffer() failed");
-    err_ret = clSetKernelArg(kernel, 0, sizeof(res_mem), &res_mem);
-    test_error(err_ret, "clSetKernelArg(0) failed");
-    for (int level = 0; level < max_nestingLevel; level++)
-    {
-        err_ret = clSetKernelArg(kernel, 1, sizeof(level), &level);
-        test_error(err_ret, "clSetKernelArg(1) failed");
-        err_ret = clEnqueueNDRangeKernel(host_queue, kernel, 1, NULL, &size,
-                                         &size, 0, NULL, &kernel_event);
-        test_error(err_ret,
-                   "clEnqueueNDRangeKernel('enqueue_multi_level') failed");
-        err_ret = clEnqueueReadBuffer(host_queue, res_mem, CL_TRUE, 0,
-                                      sizeof(result), &result, 0, NULL, NULL);
-        test_error(err_ret, "clEnqueueReadBuffer() failed");
-        if (result != level)
-        {
-            log_error("Kernel execution should return the maximum nesting "
-                      " level (got %d instead of %d)",
-                      result, level);
-            return -1;
-        }
-        err_ret =
-            clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS,
-                           sizeof(status), &status, NULL);
-        test_error(err_ret, "clGetEventInfo() failed");
-        if (check_error(status, "Kernel execution status %d", status))
-            return status;
-        cl_ulong end;
-        err_ret = clGetEventProfilingInfo(
-            kernel_event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL);
-        test_error(err_ret, "clGetEventProfilingInfo() failed");
-        cl_ulong complete;
-        err_ret =
-            clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_COMPLETE,
-                                    sizeof(complete), &complete, NULL);
-        test_error(err_ret, "clGetEventProfilingInfo() failed");
-        if (end > complete)
-        {
-            log_error(
-                "Profiling END should be smaller than or equal to COMPLETE for "
-                "kernels that use the on-device queue");
-            return -1;
-        }
-        log_info("Profiling info for '%s' kernel is OK for level %d.\n",
-                 "enqueue_multi_level", level);
-        clReleaseEvent(kernel_event);
-    }
-    return res;
diff --git a/test_conformance/device_execution/host_multi_queue.cpp b/test_conformance/device_execution/host_multi_queue.cpp
index 661d33d..e9a675c 100644
--- a/test_conformance/device_execution/host_multi_queue.cpp
+++ b/test_conformance/device_execution/host_multi_queue.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -184,11 +184,7 @@
                 global = 16;
-            err_ret |= create_single_kernel_helper(
-                context, &program[i], &kernel[i],
-                sources_multi_queue_block[i].num_lines,
-                sources_multi_queue_block[i].lines,
-                sources_multi_queue_block[i].kernel_name);
+            err_ret |= create_single_kernel_helper_with_build_options(context, &program[i], &kernel[i], sources_multi_queue_block[i].num_lines, sources_multi_queue_block[i].lines, sources_multi_queue_block[i].kernel_name, "-cl-std=CL2.0");
             if(check_error(err_ret, "Create single kernel failed")) { res = -1; break; }
             mem[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret);
diff --git a/test_conformance/device_execution/host_queue_order.cpp b/test_conformance/device_execution/host_queue_order.cpp
index 2b5688d..5dce160 100644
--- a/test_conformance/device_execution/host_queue_order.cpp
+++ b/test_conformance/device_execution/host_queue_order.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -133,14 +133,10 @@
     cl_event kernel_event;
-    err_ret = create_single_kernel_helper(
-        context, &program1, &kernel1, arr_size(enqueue_block_first_kernel),
-        enqueue_block_first_kernel, "enqueue_block_first_kernel");
+    err_ret = create_single_kernel_helper_with_build_options(context, &program1, &kernel1,  arr_size(enqueue_block_first_kernel), enqueue_block_first_kernel, "enqueue_block_first_kernel", "-cl-std=CL2.0");
     if(check_error(err_ret, "Create single kernel failed")) return -1;
-    err_ret = create_single_kernel_helper(
-        context, &program2, &kernel2, arr_size(enqueue_block_second_kernel),
-        enqueue_block_second_kernel, "enqueue_block_second_kernel");
+    err_ret = create_single_kernel_helper_with_build_options(context, &program2, &kernel2, arr_size(enqueue_block_second_kernel), enqueue_block_second_kernel, "enqueue_block_second_kernel", "-cl-std=CL2.0");
     if(check_error(err_ret, "Create single kernel failed")) return -1;
     res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret);
diff --git a/test_conformance/device_execution/main.cpp b/test_conformance/device_execution/main.cpp
index a3d0d8d..4c0b692 100644
--- a/test_conformance/device_execution/main.cpp
+++ b/test_conformance/device_execution/main.cpp
@@ -31,22 +31,18 @@
 test_status InitCL(cl_device_id device) {
   auto version = get_device_cl_version(device);
   auto expected_min_version = Version(2, 0);
-  if (version < expected_min_version)
-  {
-      version_expected_info("Test", "OpenCL",
-                            expected_min_version.to_string().c_str(),
-                            version.to_string().c_str());
-      return TEST_SKIP;
+  if (version < expected_min_version) {
+    version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
+    return TEST_SKIP;
   int error;
   cl_uint max_queues_size;
   error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES,
                           sizeof(max_queues_size), &max_queues_size, NULL);
-  if (error != CL_SUCCESS)
-  {
-      print_error(error, "Unable to get max queues on device");
-      return TEST_FAIL;
+  if (error != CL_SUCCESS) {
+    print_error(error, "Unable to get max queues on device");
+    return TEST_FAIL;
   if ((max_queues_size == 0) && (version >= Version(3, 0)))
@@ -58,12 +54,17 @@
 test_definition test_list[] = {
-    ADD_TEST(device_info),           ADD_TEST(device_queue),
-    ADD_TEST(execute_block),         ADD_TEST(enqueue_block),
-    ADD_TEST(enqueue_nested_blocks), ADD_TEST(enqueue_wg_size),
-    ADD_TEST(enqueue_flags),         ADD_TEST(enqueue_multi_queue),
-    ADD_TEST(host_multi_queue),      ADD_TEST(enqueue_ndrange),
-    ADD_TEST(host_queue_order),      ADD_TEST(enqueue_profiling),
+    ADD_TEST( device_info ),
+    ADD_TEST( device_queue ),
+    ADD_TEST( execute_block ),
+    ADD_TEST( enqueue_block ),
+    ADD_TEST( enqueue_nested_blocks ),
+    ADD_TEST( enqueue_wg_size ),
+    ADD_TEST( enqueue_flags ),
+    ADD_TEST( enqueue_multi_queue ),
+    ADD_TEST( host_multi_queue ),
+    ADD_TEST( enqueue_ndrange ),
+    ADD_TEST( host_queue_order ),
 const int test_num = ARRAY_SIZE( test_list );
diff --git a/test_conformance/device_execution/procs.h b/test_conformance/device_execution/procs.h
index 087dafc..8f668ed 100644
--- a/test_conformance/device_execution/procs.h
+++ b/test_conformance/device_execution/procs.h
@@ -26,8 +26,6 @@
 extern int test_host_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_host_queue_order(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_enqueue_profiling(cl_device_id device, cl_context context,
-                                  cl_command_queue queue, int num_elements);
 extern int test_execution_stress(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/device_execution/utils.cpp b/test_conformance/device_execution/utils.cpp
index 05b6949..66a2211 100644
--- a/test_conformance/device_execution/utils.cpp
+++ b/test_conformance/device_execution/utils.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -40,8 +40,7 @@
     cl_uint i;
     size_t ret_len;
-    err_ret = create_single_kernel_helper(context, &program, &kernel, num_lines,
-                                          source, kernel_name);
+    err_ret = create_single_kernel_helper_with_build_options(context, &program, &kernel, num_lines, source, kernel_name, "-cl-std=CL2.0");
     if(check_error(err_ret, "Create single kernel failed")) return -1;
     mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, res_size, results, &err_ret);
diff --git a/test_conformance/device_partition/main.cpp b/test_conformance/device_partition/main.cpp
index a8af6ff..f5f081e 100644
--- a/test_conformance/device_partition/main.cpp
+++ b/test_conformance/device_partition/main.cpp
@@ -41,5 +41,5 @@
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, true, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, true, 0 );
diff --git a/test_conformance/device_partition/test_device_partition.cpp b/test_conformance/device_partition/test_device_partition.cpp
index f9952ec..b90fca8 100644
--- a/test_conformance/device_partition/test_device_partition.cpp
+++ b/test_conformance/device_partition/test_device_partition.cpp
@@ -265,8 +265,7 @@
     for( i = 0; i < TEST_SIZE; i++ )
         data[i] = genrand_int32(seed);
-    stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                            sizeof(cl_int) * TEST_SIZE, data, &error);
+    stream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * TEST_SIZE, data, &error);
     test_error( error, "Unable to create test array" );
     // Update the expected results
diff --git a/test_conformance/device_timer/main.cpp b/test_conformance/device_timer/main.cpp
index 1c460af..9539143 100644
--- a/test_conformance/device_timer/main.cpp
+++ b/test_conformance/device_timer/main.cpp
@@ -30,37 +30,34 @@
     ADD_TEST( device_and_host_timers ),
-test_status InitCL(cl_device_id device)
-    auto version = get_device_cl_version(device);
-    auto expected_min_version = Version(2, 1);
-    cl_platform_id platform;
-    cl_ulong timer_res;
-    cl_int error;
+test_status InitCL(cl_device_id device) {
+	auto version = get_device_cl_version(device);
+	auto expected_min_version = Version(2, 1);
+	cl_platform_id platform;
+	cl_ulong timer_res;
+	cl_int error;
-    if (version < expected_min_version)
-    {
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
-        return TEST_SKIP;
-    }
+	if (version < expected_min_version)
+	{
+		version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
+		return TEST_SKIP;
+	}
-    error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
-                            &platform, NULL);
-    if (error != CL_SUCCESS)
-    {
-        print_error(error, "Unable to get device platform");
-        return TEST_FAIL;
-    }
+	error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
+	                        sizeof(platform), &platform, NULL);
+	if (error != CL_SUCCESS)
+	{
+		print_error(error, "Unable to get device platform");
+		return TEST_FAIL;
+	}
-    error = clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
-                              sizeof(timer_res), &timer_res, NULL);
-    if (error != CL_SUCCESS)
-    {
-        print_error(error, "Unable to get host timer capabilities");
-        return TEST_FAIL;
-    }
+	error = clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
+	                          sizeof(timer_res), &timer_res, NULL);
+	if (error != CL_SUCCESS)
+	{
+		print_error(error, "Unable to get host timer capabilities");
+		return TEST_FAIL;
+	}
     if ((timer_res == 0) && (version >= Version(3, 0)))
diff --git a/test_conformance/events/action_classes.cpp b/test_conformance/events/action_classes.cpp
index d70d76b..122c21f 100644
--- a/test_conformance/events/action_classes.cpp
+++ b/test_conformance/events/action_classes.cpp
@@ -145,11 +145,9 @@
     error = get_max_common_work_group_size( context, mKernel, threads[0], &mLocalThreads[0] );
     test_error( error, "Unable to get work group size to use" );
-    mStreams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                 sizeof(cl_float) * 1000, NULL, &error);
+    mStreams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1000, NULL, &error );
     test_error( error, "Creating test array failed" );
-    mStreams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                 sizeof(cl_int) * 1000, NULL, &error);
+    mStreams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 1000, NULL, &error );
     test_error( error, "Creating test array failed" );
     /* Set the arguments */
diff --git a/test_conformance/events/main.cpp b/test_conformance/events/main.cpp
index 777d2d3..2aafb0e 100644
--- a/test_conformance/events/main.cpp
+++ b/test_conformance/events/main.cpp
@@ -62,6 +62,6 @@
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp
index 4113654..0ab0f2a 100644
--- a/test_conformance/events/test_event_dependencies.cpp
+++ b/test_conformance/events/test_event_dependencies.cpp
@@ -203,7 +203,7 @@
     // then incremented to 5s, repeatedly. Otherwise the values may be 2s (if the first one doesn't work) or 8s
     // (if the second one doesn't work).
     if (RANDOMIZE)
-        log_info("Queues chosen randomly for each kernel execution.\n");
+        log_info("Queues chosen randomly for each kernel exection.\n");
         log_info("Queues chosen alternatily for each kernel execution.\n");
diff --git a/test_conformance/events/test_events.cpp b/test_conformance/events/test_events.cpp
index 26693f9..c2524b1 100644
--- a/test_conformance/events/test_events.cpp
+++ b/test_conformance/events/test_events.cpp
@@ -47,11 +47,9 @@
     error = get_max_common_work_group_size( inContext, *outKernel, threads[0], &localThreads[0] );
     test_error( error, "Unable to get work group size to use" );
-    streams[0] = clCreateBuffer(inContext, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 1000, NULL, &error);
+    streams[0] = clCreateBuffer(inContext, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1000, NULL, &error);
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(inContext, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * 1000, NULL, &error);
+    streams[1] = clCreateBuffer(inContext, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 1000, NULL, &error);
     test_error( error, "Creating test array failed" );
     /* Set the arguments */
@@ -180,8 +178,7 @@
     cl_int status;
-    stream = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                            sizeof(cl_float) * 1024 * 32, NULL, &error);
+    stream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
     error = clEnqueueWriteBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event);
@@ -215,8 +212,7 @@
     cl_int status;
-    stream = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                            sizeof(cl_float) * 1024 * 32, NULL, &error);
+    stream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
     error = clEnqueueReadBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event);
@@ -286,11 +282,9 @@
     cl_int status;
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 1024 * 32, NULL, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 1024 * 32, NULL, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
     error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]);
@@ -427,11 +421,9 @@
     cl_int status;
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 1024 * 32, NULL, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * 1024 * 32, NULL, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
     test_error( error, "Creating test array failed" );
     error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]);
@@ -517,12 +509,8 @@
     // Create a set of streams to use as arguments
     for( i = 0; i < NUM_EVENT_RUNS; i++ )
-        streams[i][0] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * threads[0], NULL, &error);
-        streams[i][1] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_int) * threads[0], NULL, &error);
+        streams[i][0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * threads[0], NULL, &error );
+        streams[i][1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * threads[0], NULL, &error );
         if( ( streams[i][0] == NULL ) || ( streams[i][1] == NULL ) )
             log_error( "ERROR: Unable to allocate testing streams" );
diff --git a/test_conformance/events/test_userevents.cpp b/test_conformance/events/test_userevents.cpp
index 0a4954f..7ad348f 100644
--- a/test_conformance/events/test_userevents.cpp
+++ b/test_conformance/events/test_userevents.cpp
@@ -207,10 +207,6 @@
     log_info("Unsuccessful user event case passed.\n");
-  clReleaseKernel(k0);
-  clReleaseProgram(program);
-  clReleaseMemObject(output);
   return 0;
diff --git a/test_conformance/events/test_userevents_multithreaded.cpp b/test_conformance/events/test_userevents_multithreaded.cpp
index 51ef222..2340790 100644
--- a/test_conformance/events/test_userevents_multithreaded.cpp
+++ b/test_conformance/events/test_userevents_multithreaded.cpp
@@ -16,19 +16,28 @@
 #include "testBase.h"
 #include "action_classes.h"
 #include "harness/conversions.h"
-#include <thread>
+#include "harness/genericThread.h"
 #if !defined (_MSC_VER)
     #include <unistd.h>
 #endif // !_MSC_VER
-void trigger_user_event(cl_event *event)
+class releaseEvent_thread : public genericThread
-    usleep(1000000);
-    log_info("\tTriggering gate from separate thread...\n");
-    clSetUserEventStatus(*event, CL_COMPLETE);
+    public:
+        releaseEvent_thread( cl_event *event ) : mEvent( event ) {}
+        cl_event * mEvent;
+    protected:
+        virtual void *    IRun( void )
+        {
+            usleep( 1000000 );
+            log_info( "\tTriggering gate from separate thread...\n" );
+            clSetUserEventStatus( *mEvent, CL_COMPLETE );
+            return NULL;
+        }
 int test_userevents_multithreaded( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
@@ -57,14 +66,14 @@
     // Now, instead of releasing the gate, we spawn a separate thread to do so
+    releaseEvent_thread thread( &gateEvent );
     log_info( "\tStarting trigger thread...\n" );
-    std::thread thread(trigger_user_event, &gateEvent);
+    thread.Start();
     log_info( "\tWaiting for actions...\n" );
     error = clWaitForEvents( 3, &actionEvents[ 0 ] );
     test_error( error, "Unable to wait for action events" );
-    thread.join();
     log_info( "\tActions completed.\n" );
     // If we got here without error, we're good
diff --git a/test_conformance/extensions/CMakeLists.txt b/test_conformance/extensions/CMakeLists.txt
deleted file mode 100644
index 53d77ee..0000000
--- a/test_conformance/extensions/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-add_subdirectory( cl_ext_cxx_for_opencl )
-add_subdirectory( cl_khr_dx9_media_sharing )
diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/CMakeLists.txt b/test_conformance/extensions/cl_ext_cxx_for_opencl/CMakeLists.txt
deleted file mode 100644
index fd397c3..0000000
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-    main.cpp
-    cxx_for_opencl_ext.cpp
-    cxx_for_opencl_ver.cpp
diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp
deleted file mode 100644
index 1d5252c..0000000
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-int test_cxx_for_opencl(cl_device_id device, cl_context context,
-                        cl_command_queue queue)
-    cl_int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel1;
-    clKernelWrapper kernel2;
-    clMemWrapper in_buffer;
-    clMemWrapper out_buffer;
-    cl_int value = 7;
-    const char *kernel_sstr =
-        R"(
-        __global int x;
-        template<typename T>
-        void execute(T &a, const T &b) {
-            a = b * 2;
-        }
-        __kernel void k1(__global int *p) {
-            execute(x, *p);
-        }
-        __kernel void k2(__global int *p) {
-            execute(*p, x);
-        })";
-    error = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel1, 1, &kernel_sstr, "k1", "-cl-std=CLC++");
-    test_error(error, "Failed to create k1 kernel");
-    kernel2 = clCreateKernel(program, "k2", &error);
-    test_error(error, "Failed to create k2 kernel");
-    in_buffer =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                       sizeof(value), &value, &error);
-    test_error(error, "clCreateBuffer failed");
-    out_buffer =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                       sizeof(value), &value, &error);
-    test_error(error, "clCreateBuffer failed");
-    error = clSetKernelArg(kernel1, 0, sizeof(in_buffer), &in_buffer);
-    test_error(error, "clSetKernelArg failed");
-    error = clSetKernelArg(kernel2, 0, sizeof(out_buffer), &out_buffer);
-    test_error(error, "clSetKernelArg failed");
-    size_t global_size = 1;
-    error = clEnqueueNDRangeKernel(queue, kernel1, 1, nullptr, &global_size,
-                                   nullptr, 0, nullptr, nullptr);
-    test_error(error, "clEnqueueNDRangeKernel failed");
-    error = clEnqueueNDRangeKernel(queue, kernel2, 1, nullptr, &global_size,
-                                   nullptr, 0, nullptr, nullptr);
-    test_error(error, "clEnqueueNDRangeKernel failed");
-    error = clEnqueueReadBuffer(queue, out_buffer, CL_BLOCKING, 0,
-                                sizeof(value), &value, 0, nullptr, nullptr);
-    test_error(error, "clEnqueueReadBuffer failed");
-    error = clFinish(queue);
-    test_error(error, "clFinish failed");
-    if (value != 28)
-    {
-        log_error("ERROR: Kernel wrote %lu, expected 28\n",
-                  static_cast<long unsigned>(value));
-        return TEST_FAIL;
-    }
-    return TEST_PASS;
-int test_cxx_for_opencl_ext(cl_device_id device, cl_context context,
-                            cl_command_queue queue, int)
-    if (!is_extension_available(device, "cl_ext_cxx_for_opencl"))
-    {
-        log_info("Device does not support 'cl_ext_cxx_for_opencl'. Skipping "
-                 "the test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-    return test_cxx_for_opencl(device, context, queue);
diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ver.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ver.cpp
deleted file mode 100644
index 0376081..0000000
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ver.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-int test_cxx_for_opencl_version(cl_device_id device, cl_context context,
-                                cl_command_queue queue)
-    cl_int cxx4opencl_version;
-    cl_int cxx4opencl_expected_version;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    cl_int error;
-    cl_int value = 0;
-    const char *kernel_sstr =
-        R"(
-        __kernel void k(__global int* buf) {
-            buf[0] = __OPENCL_CPP_VERSION__;
-        })";
-    const size_t lengths[1] = { std::string{ kernel_sstr }.size() };
-    clProgramWrapper writer_program =
-        clCreateProgramWithSource(context, 1, &kernel_sstr, lengths, &error);
-    test_error(error, "Failed to create program with source");
-    error = clCompileProgram(writer_program, 1, &device, "-cl-std=CLC++", 0,
-                             nullptr, nullptr, nullptr, nullptr);
-    test_error(error, "Failed to compile program");
-    cl_program progs[1] = { writer_program };
-    program = clLinkProgram(context, 1, &device, "", 1, progs, 0, 0, &error);
-    test_error(error, "Failed to link program");
-    clMemWrapper out =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                       sizeof(cxx4opencl_version), &cxx4opencl_version, &error);
-    test_error(error, "clCreateBuffer failed");
-    kernel = clCreateKernel(program, "k", &error);
-    test_error(error, "Failed to create k kernel");
-    error = clSetKernelArg(kernel, 0, sizeof(out), &out);
-    test_error(error, "clSetKernelArg failed");
-    size_t global_size = 1;
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &global_size,
-                                   nullptr, 0, nullptr, nullptr);
-    test_error(error, "clEnqueueNDRangeKernel failed");
-    error = clEnqueueReadBuffer(queue, out, CL_BLOCKING, 0,
-                                sizeof(cxx4opencl_version), &cxx4opencl_version,
-                                0, nullptr, nullptr);
-    test_error(error, "clEnqueueReadBuffer failed");
-    error = clFinish(queue);
-    test_error(error, "clFinish failed");
-    error =
-                        sizeof(value), &value, nullptr);
-    test_error(error, "Failed to get device info");
-    cxx4opencl_expected_version = CL_VERSION_MAJOR_KHR(value) * 100
-        + CL_VERSION_MINOR_KHR(value) * 10 + CL_VERSION_PATCH_KHR(value);
-    if (cxx4opencl_version != cxx4opencl_expected_version)
-    {
-        log_error("ERROR: C++ for OpenCL version mismatch - returned %lu, "
-                  "expected %lu\n",
-                  static_cast<long unsigned>(value),
-                  static_cast<long unsigned>(cxx4opencl_expected_version));
-        return TEST_FAIL;
-    }
-    return TEST_PASS;
-int test_cxx_for_opencl_ver(cl_device_id device, cl_context context,
-                            cl_command_queue queue, int)
-    if (!is_extension_available(device, "cl_ext_cxx_for_opencl"))
-    {
-        log_info("Device does not support 'cl_ext_cxx_for_opencl'. Skipping "
-                 "the test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-    return test_cxx_for_opencl_version(device, context, queue);
diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/main.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/main.cpp
deleted file mode 100644
index 5e8c14a..0000000
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/main.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-test_definition test_list[] = {
-    ADD_TEST_VERSION(cxx_for_opencl_ext, Version(2, 0)),
-    ADD_TEST_VERSION(cxx_for_opencl_ver, Version(2, 0))
-int main(int argc, const char *argv[])
-    return runTestHarnessWithCheck(argc, argv, ARRAY_SIZE(test_list), test_list,
-                                   false, 0, nullptr);
diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/procs.h b/test_conformance/extensions/cl_ext_cxx_for_opencl/procs.h
deleted file mode 100644
index 5665e01..0000000
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/procs.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef _procs_h
-#define _procs_h
-#include "harness/typeWrappers.h"
-extern int test_cxx_for_opencl_ext(cl_device_id device, cl_context context,
-                                   cl_command_queue queue, int);
-extern int test_cxx_for_opencl_ver(cl_device_id device, cl_context context,
-                                   cl_command_queue queue, int);
-#endif /*_procs_h*/
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/CMakeLists.txt b/test_conformance/extensions/cl_khr_dx9_media_sharing/CMakeLists.txt
deleted file mode 100644
index 1ec2a33..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-list(APPEND CLConform_LIBRARIES d3d9.lib dxva2.lib )
-        main.cpp
-        wrappers.cpp
-        utils.cpp
-        test_create_context.cpp
-        test_functions_api.cpp
-        test_functions_kernel.cpp
-        test_get_device_ids.cpp
-        test_interop_sync.cpp
-        test_memory_access.cpp
-        test_other_data_types.cpp
-    )
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/main.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/main.cpp
deleted file mode 100644
index 8b70917..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/main.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <stdio.h>
-#include <stdlib.h>
-#include "harness/testHarness.h"
-#include "utils.h"
-#include "procs.h"
-test_definition test_list[] = { ADD_TEST(context_create),
-                                ADD_TEST(get_device_ids),
-                                ADD_TEST(api),
-                                ADD_TEST(kernel),
-                                ADD_TEST(other_data_types),
-                                ADD_TEST(memory_access),
-                                ADD_TEST(interop_user_sync) };
-const int test_num = ARRAY_SIZE(test_list);
-clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR =
-    NULL;
-clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR = NULL;
-clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR =
-    NULL;
-clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR =
-    NULL;
-cl_platform_id gPlatformIDdetected;
-cl_device_id gDeviceIDdetected;
-cl_device_type gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
-bool MediaSurfaceSharingExtensionInit()
-    clGetDeviceIDsFromDX9MediaAdapterKHR =
-        (clGetDeviceIDsFromDX9MediaAdapterKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                gPlatformIDdetected, "clGetDeviceIDsFromDX9MediaAdapterKHR");
-    if (clGetDeviceIDsFromDX9MediaAdapterKHR == NULL)
-    {
-        log_error("clGetExtensionFunctionAddressForPlatform("
-                  "clGetDeviceIDsFromDX9MediaAdapterKHR) returned NULL.\n");
-        return false;
-    }
-    clCreateFromDX9MediaSurfaceKHR = (clCreateFromDX9MediaSurfaceKHR_fn)
-        clGetExtensionFunctionAddressForPlatform(
-            gPlatformIDdetected, "clCreateFromDX9MediaSurfaceKHR");
-    if (clCreateFromDX9MediaSurfaceKHR == NULL)
-    {
-        log_error("clGetExtensionFunctionAddressForPlatform("
-                  "clCreateFromDX9MediaSurfaceKHR) returned NULL.\n");
-        return false;
-    }
-    clEnqueueAcquireDX9MediaSurfacesKHR =
-        (clEnqueueAcquireDX9MediaSurfacesKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                gPlatformIDdetected, "clEnqueueAcquireDX9MediaSurfacesKHR");
-    if (clEnqueueAcquireDX9MediaSurfacesKHR == NULL)
-    {
-        log_error("clGetExtensionFunctionAddressForPlatform("
-                  "clEnqueueAcquireDX9MediaSurfacesKHR) returned NULL.\n");
-        return false;
-    }
-    clEnqueueReleaseDX9MediaSurfacesKHR =
-        (clEnqueueReleaseDX9MediaSurfacesKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                gPlatformIDdetected, "clEnqueueReleaseDX9MediaSurfacesKHR");
-    if (clEnqueueReleaseDX9MediaSurfacesKHR == NULL)
-    {
-        log_error("clGetExtensionFunctionAddressForPlatform("
-                  "clEnqueueReleaseDX9MediaSurfacesKHR) returned NULL.\n");
-        return false;
-    }
-    return true;
-bool DetectPlatformAndDevice()
-    std::vector<cl_platform_id> platforms;
-    cl_uint platformsNum = 0;
-    cl_int error = clGetPlatformIDs(0, 0, &platformsNum);
-    if (error != CL_SUCCESS)
-    {
-        print_error(error, "clGetPlatformIDs failed\n");
-        return false;
-    }
-    platforms.resize(platformsNum);
-    error = clGetPlatformIDs(platformsNum, &platforms[0], 0);
-    if (error != CL_SUCCESS)
-    {
-        print_error(error, "clGetPlatformIDs failed\n");
-        return false;
-    }
-    bool found = false;
-    for (size_t i = 0; i < platformsNum; ++i)
-    {
-        std::vector<cl_device_id> devices;
-        cl_uint devicesNum = 0;
-        error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, 0, 0,
-                               &devicesNum);
-        if (error != CL_SUCCESS)
-        {
-            print_error(error, "clGetDeviceIDs failed\n");
-            return false;
-        }
-        devices.resize(devicesNum);
-        error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, devicesNum,
-                               &devices[0], 0);
-        if (error != CL_SUCCESS)
-        {
-            print_error(error, "clGetDeviceIDs failed\n");
-            return false;
-        }
-        for (size_t j = 0; j < devicesNum; ++j)
-        {
-            if (is_extension_available(devices[j], "cl_khr_dx9_media_sharing"))
-            {
-                gPlatformIDdetected = platforms[i];
-                gDeviceIDdetected = devices[j];
-                found = true;
-                break;
-            }
-        }
-    }
-    if (!found)
-    {
-        log_info("Test was not run, because the media surface sharing "
-                 "extension is not supported for any devices.\n");
-        return false;
-    }
-    return true;
-bool CmdlineParse(int argc, const char *argv[])
-    char *env_mode = getenv("CL_DEVICE_TYPE");
-    if (env_mode != NULL)
-    {
-        if (strcmp(env_mode, "gpu") == 0
-            || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
-            gDeviceTypeSelected = CL_DEVICE_TYPE_GPU;
-        else if (strcmp(env_mode, "cpu") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0)
-            gDeviceTypeSelected = CL_DEVICE_TYPE_CPU;
-        else if (strcmp(env_mode, "accelerator") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_ACCELERATOR") == 0)
-            gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR;
-        else if (strcmp(env_mode, "default") == 0
-                 || strcmp(env_mode, "CL_DEVICE_TYPE_DEFAULT") == 0)
-            gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
-        else
-        {
-            log_error("Unknown CL_DEVICE_TYPE env variable setting: "
-                      "%s.\nAborting...\n",
-                      env_mode);
-            return false;
-        }
-    }
-    for (int i = 0; i < argc; ++i)
-    {
-        if (strcmp(argv[i], "gpu") == 0
-            || strcmp(argv[i], "CL_DEVICE_TYPE_GPU") == 0)
-        {
-            gDeviceTypeSelected = CL_DEVICE_TYPE_GPU;
-            continue;
-        }
-        else if (strcmp(argv[i], "cpu") == 0
-                 || strcmp(argv[i], "CL_DEVICE_TYPE_CPU") == 0)
-        {
-            gDeviceTypeSelected = CL_DEVICE_TYPE_CPU;
-            continue;
-        }
-        else if (strcmp(argv[i], "accelerator") == 0
-                 || strcmp(argv[i], "CL_DEVICE_TYPE_ACCELERATOR") == 0)
-        {
-            gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR;
-            continue;
-        }
-        else if (strcmp(argv[i], "CL_DEVICE_TYPE_DEFAULT") == 0)
-        {
-            gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
-            continue;
-        }
-        else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0)
-        {
-            CDeviceWrapper::AccelerationType(CDeviceWrapper::ACCELERATION_SW);
-        }
-    }
-    return true;
-int main(int argc, const char *argv[])
-    if (!CmdlineParse(argc, argv)) return TEST_FAIL;
-    if (!DetectPlatformAndDevice())
-    {
-        log_info("Test was not run, because the media surface sharing "
-                 "extension is not supported\n");
-        return TEST_SKIP;
-    }
-    if (!MediaSurfaceSharingExtensionInit()) return TEST_FAIL;
-    return runTestHarness(argc, argv, test_num, test_list, true, 0);
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/procs.h b/test_conformance/extensions/cl_khr_dx9_media_sharing/procs.h
deleted file mode 100644
index e7fd785..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/procs.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2019 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-extern int test_context_create(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_get_device_ids(cl_device_id deviceID, cl_context context,
-                               cl_command_queue queue, int num_elements);
-extern int test_api(cl_device_id deviceID, cl_context context,
-                    cl_command_queue queue, int num_elements);
-extern int test_kernel(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, int num_elements);
-extern int test_other_data_types(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
-extern int test_memory_access(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
-extern int test_interop_user_sync(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-#endif // #ifndef __MEDIA_SHARING_PROCS_H__
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_create_context.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_create_context.cpp
deleted file mode 100644
index 6033ce9..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_create_context.cpp
+++ /dev/null
@@ -1,373 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "utils.h"
-int context_create(cl_device_id deviceID, cl_context context,
-                   cl_command_queue queue, int num_elements, unsigned int width,
-                   unsigned int height, TContextFuncType functionCreate,
-                   cl_dx9_media_adapter_type_khr adapterType,
-                   TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
-    CResult result;
-    // create device
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    // generate input data
-    std::vector<cl_uchar> bufferIn(width * height * 3 / 2, 0);
-    if (!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-        if (surfaceFormat != SURFACE_FORMAT_NV12
-            && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info(
-                "Skipping test case, image format is not supported by a device "
-                "(adapter type: %s, format: %s, shared handle: %s)\n",
-                adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
-            return result.Result();
-        }
-        void *objectSharedHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surface;
-        if (!MediaSurfaceCreate(
-                adapterType, width, height, surfaceFormat, *deviceWrapper,
-                surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false,
-                &objectSharedHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-        cl_context_properties contextProperties[] = {
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            0,
-        };
-        clContextWrapper ctx;
-        switch (functionCreate)
-        {
-            case CONTEXT_CREATE_DEFAULT:
-                ctx = clCreateContext(&contextProperties[0], 1,
-                                      &gDeviceIDdetected, NULL, NULL, &error);
-                break;
-            case CONTEXT_CREATE_FROM_TYPE:
-                ctx = clCreateContextFromType(&contextProperties[0],
-                                              gDeviceTypeSelected, NULL, NULL,
-                                              &error);
-                break;
-            default:
-                log_error("Unknown context creation function enum\n");
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-                break;
-        }
-        if (error != CL_SUCCESS)
-        {
-            std::string functionName;
-            FunctionContextCreateToString(functionCreate, functionName);
-            log_error("%s failed: %s\n", functionName.c_str(),
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height))
-        {
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-#if defined(_WIN32)
-        cl_dx9_surface_info_khr surfaceInfo;
-        surfaceInfo.resource =
-            *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-        surfaceInfo.shared_handle = objectSharedHandle;
-        void *surfaceInfo = 0;
-        return TEST_NOT_IMPLEMENTED;
-        std::vector<cl_mem> memObjList;
-        unsigned int planesNum = PlanesNum(surfaceFormat);
-        std::vector<clMemWrapper> planesList(planesNum);
-        for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-        {
-            planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error(
-                    "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n",
-                    planeIdx, IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            memObjList.push_back(planesList[planeIdx]);
-        }
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!ImageInfoVerify(adapterType, memObjList, width, height, surface,
-                             objectSharedHandle))
-        {
-            log_error("Image info verification failed\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        cl_event event;
-        error = clEnqueueAcquireDX9MediaSurfacesKHR(
-            cmdQueue, static_cast<cl_uint>(memObjList.size()),
-            &, 0, NULL, &event);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        cl_uint eventType = 0;
-        error = clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(eventType),
-                               &eventType, NULL);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clGetEventInfo failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        {
-            log_error(
-                "Invalid event != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        clReleaseEvent(event);
-        size_t origin[3] = { 0, 0, 0 };
-        size_t offset = 0;
-        size_t frameSize = width * height * 3 / 2;
-        std::vector<cl_uchar> out(frameSize, 0);
-        for (size_t i = 0; i < memObjList.size(); ++i)
-        {
-            size_t planeWidth = (i == 0) ? width : width / 2;
-            size_t planeHeight = (i == 0) ? height : height / 2;
-            size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-            error =
-                clEnqueueReadImage(cmdQueue,, CL_TRUE, origin,
-                                   regionPlane, 0, 0, &, 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReadImage failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-            offset += planeWidth * planeHeight;
-        }
-        if (!YUVCompare(surfaceFormat, out, bufferIn, width, height))
-        {
-            log_error("OCL object verification failed - clEnqueueReadImage\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        error = clEnqueueReleaseDX9MediaSurfacesKHR(
-            cmdQueue, static_cast<cl_uint>(memObjList.size()),
-            &, 0, NULL, &event);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        eventType = 0;
-        error = clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(eventType),
-                               &eventType, NULL);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clGetEventInfo failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        {
-            log_error(
-                "Invalid event != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        clReleaseEvent(event);
-        // object verification
-        std::vector<cl_uchar> bufferOut(frameSize, 0);
-        if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
-        {
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height))
-        {
-            log_error("Media surface is different than expected\n");
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-    }
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-    return result.Result();
-int test_context_create(cl_device_id deviceID, cl_context context,
-                        cl_command_queue queue, int num_elements)
-    const unsigned int WIDTH = 256;
-    const unsigned int HEIGHT = 256;
-    std::vector<cl_dx9_media_adapter_type_khr> adapterTypes;
-#if defined(_WIN32)
-    adapterTypes.push_back(CL_ADAPTER_D3D9_KHR);
-    adapterTypes.push_back(CL_ADAPTER_D3D9EX_KHR);
-    adapterTypes.push_back(CL_ADAPTER_DXVA_KHR);
-    std::vector<TContextFuncType> contextFuncs;
-    contextFuncs.push_back(CONTEXT_CREATE_DEFAULT);
-    contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE);
-    std::vector<TSurfaceFormat> formats;
-    formats.push_back(SURFACE_FORMAT_NV12);
-    formats.push_back(SURFACE_FORMAT_YV12);
-    std::vector<TSharedHandleType> sharedHandleTypes;
-    sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED);
-#if defined(_WIN32)
-    sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED);
-    CResult result;
-    for (size_t adapterTypeIdx = 0; adapterTypeIdx < adapterTypes.size();
-         ++adapterTypeIdx)
-    {
-        // iteration through all create context functions
-        for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size();
-             ++contextFuncIdx)
-        {
-            // iteration through surface formats
-            for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx)
-            {
-                // shared handle enabled or disabled
-                for (size_t sharedHandleIdx = 0;
-                     sharedHandleIdx < sharedHandleTypes.size();
-                     ++sharedHandleIdx)
-                {
-                    if (adapterTypes[adapterTypeIdx] == CL_ADAPTER_D3D9_KHR
-                        && sharedHandleTypes[sharedHandleIdx]
-                            == SHARED_HANDLE_ENABLED)
-                        continue;
-                    if (context_create(
-                            deviceID, context, queue, num_elements, WIDTH,
-                            HEIGHT, contextFuncs[contextFuncIdx],
-                            adapterTypes[adapterTypeIdx], formats[formatIdx],
-                            sharedHandleTypes[sharedHandleIdx])
-                        != 0)
-                    {
-                        std::string sharedHandle =
-                            (sharedHandleTypes[sharedHandleIdx]
-                             == SHARED_HANDLE_ENABLED)
-                            ? "shared handle"
-                            : "no shared handle";
-                        std::string formatStr;
-                        std::string adapterTypeStr;
-                        SurfaceFormatToString(formats[formatIdx], formatStr);
-                        AdapterToString(adapterTypes[adapterTypeIdx],
-                                        adapterTypeStr);
-                        log_error("\nTest case - clCreateContext (%s, %s, %s) "
-                                  "failed\n\n",
-                                  adapterTypeStr.c_str(), formatStr.c_str(),
-                                  sharedHandle.c_str());
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                }
-            }
-        }
-    }
-    return result.Result();
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp
deleted file mode 100644
index ab92cb8..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp
+++ /dev/null
@@ -1,781 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "utils.h"
-int api_functions(cl_device_id deviceID, cl_context context,
-                  cl_command_queue queue, int num_elements,
-                  unsigned int iterationNum, unsigned int width,
-                  unsigned int height,
-                  cl_dx9_media_adapter_type_khr adapterType,
-                  TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
-    const unsigned int FRAME_NUM = 2;
-    const cl_uchar MAX_VALUE = 255 / 2;
-    CResult result;
-    // create device
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    // generate input and expected data
-    std::vector<std::vector<cl_uchar>> bufferRef1(FRAME_NUM);
-    std::vector<std::vector<cl_uchar>> bufferRef2(FRAME_NUM);
-    std::vector<std::vector<cl_uchar>> bufferRef3(FRAME_NUM);
-    size_t frameSize = width * height * 3 / 2;
-    cl_uchar step = MAX_VALUE / FRAME_NUM;
-    for (size_t i = 0; i < FRAME_NUM; ++i)
-    {
-        if (!YUVGenerate(surfaceFormat, bufferRef1[i], width, height,
-                         static_cast<cl_uchar>(step * i),
-                         static_cast<cl_uchar>(step * (i + 1)))
-            || !YUVGenerate(surfaceFormat, bufferRef2[i], width, height,
-                            static_cast<cl_uchar>(step * i),
-                            static_cast<cl_uchar>(step * (i + 1)), 0.2)
-            || !YUVGenerate(surfaceFormat, bufferRef3[i], width, height,
-                            static_cast<cl_uchar>(step * i),
-                            static_cast<cl_uchar>(step * (i + 1)), 0.4))
-        {
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-    }
-    // iterates through all devices
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-        if (surfaceFormat != SURFACE_FORMAT_NV12
-            && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info(
-                "Skipping test case, image format is not supported by a device "
-                "(adapter type: %s, format: %s, shared handle: %s)\n",
-                adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
-            return result.Result();
-        }
-        void *objectSharedHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surface;
-        // create surface
-        if (!MediaSurfaceCreate(
-                adapterType, width, height, surfaceFormat, *deviceWrapper,
-                surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false,
-                &objectSharedHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-        cl_context_properties contextProperties[] = {
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            0,
-        };
-        clContextWrapper ctx = clCreateContext(
-            &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateContext failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-#if defined(_WIN32)
-        cl_dx9_surface_info_khr surfaceInfo;
-        surfaceInfo.resource =
-            *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-        surfaceInfo.shared_handle = objectSharedHandle;
-        void *surfaceInfo = 0;
-        return TEST_NOT_IMPLEMENTED;
-        std::vector<cl_mem> memObjList;
-        unsigned int planesNum = PlanesNum(surfaceFormat);
-        std::vector<clMemWrapper> planesList(planesNum);
-        for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-        {
-            planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error(
-                    "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n",
-                    planeIdx, IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            memObjList.push_back(planesList[planeIdx]);
-        }
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!ImageInfoVerify(adapterType, memObjList, width, height, surface,
-                             objectSharedHandle))
-        {
-            log_error("Image info verification failed\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
-        {
-            if (!YUVSurfaceSet(surfaceFormat, surface,
-                               bufferRef1[frameIdx % FRAME_NUM], width, height))
-            {
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-            }
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            { // read operation
-                std::vector<cl_uchar> out(frameSize, 0);
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += planeWidth * planeHeight;
-                }
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef1[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error("Frame idx: %i, OCL image is different then "
-                              "shared OCL object: clEnqueueReadImage\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // write operation
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    error = clEnqueueWriteImage(
-                        cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
-                        0, 0, &bufferRef2[frameIdx % FRAME_NUM][offset], 0, 0,
-                        0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueWriteImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += planeWidth * planeHeight;
-                }
-            }
-            { // read operation
-                std::vector<cl_uchar> out(frameSize, 0);
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += planeWidth * planeHeight;
-                }
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef2[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error("Frame idx: %i, Shared OCL image verification "
-                              "after clEnqueueWriteImage failed\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // copy operation (shared OCL to OCL)
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                std::vector<cl_uchar> out(frameSize, 0);
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    cl_image_format formatPlane;
-                    formatPlane.image_channel_data_type = CL_UNORM_INT8;
-                    formatPlane.image_channel_order =
-                        (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0) ? CL_RG
-                                                                        : CL_R;
-                    cl_image_desc imageDesc = { 0 };
-                    imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
-                    imageDesc.image_width = planeWidth;
-                    imageDesc.image_height = planeHeight;
-                    clMemWrapper planeOCL =
-                        clCreateImage(ctx, CL_MEM_READ_WRITE, &formatPlane,
-                                      &imageDesc, 0, &error);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clCreateImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    error = clEnqueueCopyImage(cmdQueue, memObjList[i],
-                                               planeOCL, origin, origin,
-                                               regionPlane, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueCopyImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    error = clEnqueueReadImage(cmdQueue, planeOCL, CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += planeWidth * planeHeight;
-                }
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef2[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error(
-                        "Frame idx: %i, OCL image verification after "
-                        "clEnqueueCopyImage (from shared OCL to OCL) failed\n",
-                        frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // copy operation (OCL to shared OCL)
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                std::vector<cl_uchar> out(frameSize, 0);
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    size_t pitchSize =
-                        ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)
-                             ? width
-                             : planeWidth)
-                        * sizeof(cl_uchar);
-                    cl_image_format formatPlane;
-                    formatPlane.image_channel_data_type = CL_UNORM_INT8;
-                    formatPlane.image_channel_order =
-                        (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0) ? CL_RG
-                                                                        : CL_R;
-                    cl_image_desc imageDesc = { 0 };
-                    imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
-                    imageDesc.image_width = planeWidth;
-                    imageDesc.image_height = planeHeight;
-                    imageDesc.image_row_pitch = pitchSize;
-                    clMemWrapper planeOCL = clCreateImage(
-                        ctx, CL_MEM_COPY_HOST_PTR, &formatPlane, &imageDesc,
-                        &bufferRef1[frameIdx % FRAME_NUM][offset], &error);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clCreateImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    error = clEnqueueCopyImage(cmdQueue, planeOCL,
-                                               memObjList[i], origin, origin,
-                                               regionPlane, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueCopyImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += planeWidth * planeHeight;
-                }
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef1[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error(
-                        "Frame idx: %i, OCL image verification after "
-                        "clEnqueueCopyImage (from OCL to shared OCL) failed\n",
-                        frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // copy from image to buffer
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                size_t bufferSize = sizeof(cl_uchar) * frameSize;
-                clMemWrapper buffer = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
-                                                     bufferSize, NULL, &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateBuffer failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    error = clEnqueueCopyImageToBuffer(
-                        cmdQueue, memObjList[i], buffer, origin, regionPlane,
-                        offset, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueCopyImageToBuffer failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += planeWidth * planeHeight * sizeof(cl_uchar);
-                }
-                std::vector<cl_uchar> out(frameSize, 0);
-                error = clEnqueueReadBuffer(cmdQueue, buffer, CL_TRUE, 0,
-                                            bufferSize, &out[0], 0, NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to read buffer");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef1[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error("Frame idx: %i, OCL buffer verification after "
-                              "clEnqueueCopyImageToBuffer (from shared OCL "
-                              "image to OCL buffer) failed\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // copy buffer to image
-                size_t bufferSize = sizeof(cl_uchar) * frameSize;
-                clMemWrapper buffer = clCreateBuffer(
-                    ctx, CL_MEM_COPY_HOST_PTR, bufferSize,
-                    &bufferRef2[frameIdx % FRAME_NUM][0], &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateBuffer failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                std::vector<cl_uchar> out(frameSize, 0);
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    error = clEnqueueCopyBufferToImage(
-                        cmdQueue, buffer, memObjList[i], offset, origin,
-                        regionPlane, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueCopyBufferToImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += planeWidth * planeHeight * sizeof(cl_uchar);
-                }
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef2[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error("Frame idx: %i, OCL image verification after "
-                              "clEnqueueCopyBufferToImage (from OCL buffer to "
-                              "shared OCL image) failed\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // map operation to read
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                std::vector<cl_uchar> out(frameSize, 0);
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    size_t pitchSize =
-                        ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)
-                             ? width
-                             : planeWidth);
-                    size_t rowPitch = 0;
-                    size_t slicePitch = 0;
-                    void *mapPtr = clEnqueueMapImage(
-                        cmdQueue, memObjList[i], CL_TRUE, CL_MAP_READ, origin,
-                        regionPlane, &rowPitch, &slicePitch, 0, 0, 0, &error);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueMapImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    for (size_t y = 0; y < planeHeight; ++y)
-                        memcpy(&out[offset + y * pitchSize],
-                               static_cast<cl_uchar *>(mapPtr)
-                                   + y * rowPitch / sizeof(cl_uchar),
-                               pitchSize * sizeof(cl_uchar));
-                    error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i],
-                                                    mapPtr, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueUnmapMemObject failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += pitchSize * planeHeight;
-                }
-                if (!YUVCompare(surfaceFormat, out,
-                                bufferRef2[frameIdx % FRAME_NUM], width,
-                                height))
-                {
-                    log_error("Frame idx: %i, Mapped shared OCL image is "
-                              "different then expected\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // map operation to write
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    size_t pitchSize =
-                        ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)
-                             ? width
-                             : planeWidth);
-                    size_t rowPitch = 0;
-                    size_t slicePitch = 0;
-                    void *mapPtr = clEnqueueMapImage(
-                        cmdQueue, memObjList[i], CL_TRUE, CL_MAP_WRITE, origin,
-                        regionPlane, &rowPitch, &slicePitch, 0, 0, 0, &error);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueMapImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    for (size_t y = 0; y < planeHeight; ++y)
-                        memcpy(static_cast<cl_uchar *>(mapPtr)
-                                   + y * rowPitch / sizeof(cl_uchar),
-                               &bufferRef3[frameIdx % FRAME_NUM]
-                                          [offset + y * pitchSize],
-                               pitchSize * sizeof(cl_uchar));
-                    error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i],
-                                                    mapPtr, 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueUnmapMemObject failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += pitchSize * planeHeight;
-                }
-            }
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-            std::vector<cl_uchar> bufferOut(frameSize, 0);
-            if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width,
-                               height))
-            {
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            if (!YUVCompare(surfaceFormat, bufferOut,
-                            bufferRef3[frameIdx % FRAME_NUM], width, height))
-            {
-                log_error(
-                    "Frame idx: %i, media surface is different than expected\n",
-                    frameIdx);
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-    }
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-    return result.Result();
-int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue,
-             int num_elements)
-    CResult result;
-#if defined(_WIN32)
-    // D3D9
-    if (api_functions(deviceID, context, queue, num_elements, 10, 256, 256,
-                      CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (api_functions(deviceID, context, queue, num_elements, 3, 512, 256,
-                      CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    // D3D9EX
-    if (api_functions(deviceID, context, queue, num_elements, 5, 256, 512,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (api_functions(deviceID, context, queue, num_elements, 7, 512, 256,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (api_functions(deviceID, context, queue, num_elements, 10, 256, 256,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (api_functions(deviceID, context, queue, num_elements, 15, 128, 128,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    // DXVA
-    if (api_functions(deviceID, context, queue, num_elements, 20, 128, 128,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (api_functions(deviceID, context, queue, num_elements, 40, 64, 64,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (api_functions(deviceID, context, queue, num_elements, 5, 512, 512,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (api_functions(deviceID, context, queue, num_elements, 2, 1024, 1024,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    return result.Result();
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp
deleted file mode 100644
index a204440..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp
+++ /dev/null
@@ -1,541 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <stdio.h>
-#include <stdlib.h>
-#include "harness/errorHelpers.h"
-#include "harness/kernelHelpers.h"
-#include "utils.h"
-int kernel_functions(cl_device_id deviceID, cl_context context,
-                     cl_command_queue queue, int num_elements,
-                     unsigned int iterationNum, unsigned int width,
-                     unsigned int height,
-                     cl_dx9_media_adapter_type_khr adapterType,
-                     TSurfaceFormat surfaceFormat,
-                     TSharedHandleType sharedHandle)
-    const unsigned int FRAME_NUM = 2;
-    const cl_uchar MAX_VALUE = 255 / 2;
-    const std::string PROGRAM_STR =
-        "__kernel void TestFunction( read_only image2d_t planeIn, write_only "
-        "image2d_t planeOut, " NL "                            sampler_t "
-        "sampler, __global int *planeRes)" NL "{" NL
-        "  int w = get_global_id(0);" NL "  int h = get_global_id(1);" NL
-        "  int width = get_image_width(planeIn);" NL
-        "  int height = get_image_height(planeOut);" NL
-        "  float4 color0 = read_imagef(planeIn, sampler, (int2)(w,h)) + "
-        "0.2f;" NL "  float4 color1 = read_imagef(planeIn, sampler, "
-        "(float2)(w,h)) + 0.2f;" NL
-        "  color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, "
-        "0.5);" NL "  write_imagef(planeOut, (int2)(w,h), color0);" NL
-        "  if(w == 0 && h == 0)" NL "  {" NL "    planeRes[0] = width;" NL
-        "    planeRes[1] = height;" NL "  }" NL "}";
-    CResult result;
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    std::vector<std::vector<cl_uchar>> bufferIn(FRAME_NUM);
-    std::vector<std::vector<cl_uchar>> bufferExp(FRAME_NUM);
-    size_t frameSize = width * height * 3 / 2;
-    cl_uchar step = MAX_VALUE / FRAME_NUM;
-    for (size_t i = 0; i < FRAME_NUM; ++i)
-    {
-        if (!YUVGenerate(surfaceFormat, bufferIn[i], width, height,
-                         static_cast<cl_uchar>(step * i),
-                         static_cast<cl_uchar>(step * (i + 1)))
-            || !YUVGenerate(surfaceFormat, bufferExp[i], width, height,
-                            static_cast<cl_uchar>(step * i),
-                            static_cast<cl_uchar>(step * (i + 1)), 0.2))
-        {
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-    }
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-        if (surfaceFormat != SURFACE_FORMAT_NV12
-            && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info(
-                "Skipping test case, image format is not supported by a device "
-                "(adapter type: %s, format: %s, shared handle: %s)\n",
-                adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
-            return result.Result();
-        }
-        void *objectSrcHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surfaceSrc;
-        if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat,
-                                *deviceWrapper, surfaceSrc,
-                                (sharedHandle == SHARED_HANDLE_ENABLED) ? true
-                                                                        : false,
-                                &objectSrcHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-        void *objectDstHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surfaceDst;
-        if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat,
-                                *deviceWrapper, surfaceDst,
-                                (sharedHandle == SHARED_HANDLE_ENABLED) ? true
-                                                                        : false,
-                                &objectDstHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-        cl_context_properties contextProperties[] = {
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            0,
-        };
-        clContextWrapper ctx = clCreateContext(
-            &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateContext failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-#if defined(_WIN32)
-        cl_dx9_surface_info_khr surfaceInfoSrc;
-        surfaceInfoSrc.resource =
-            *(static_cast<CD3D9SurfaceWrapper *>(surfaceSrc.get()));
-        surfaceInfoSrc.shared_handle = objectSrcHandle;
-        cl_dx9_surface_info_khr surfaceInfoDst;
-        surfaceInfoDst.resource =
-            *(static_cast<CD3D9SurfaceWrapper *>(surfaceDst.get()));
-        surfaceInfoDst.shared_handle = objectDstHandle;
-        void *surfaceInfoSrc = 0;
-        void *surfaceInfoDst = 0;
-        return TEST_NOT_IMPLEMENTED;
-        std::vector<cl_mem> memObjSrcList;
-        std::vector<cl_mem> memObjDstList;
-        unsigned int planesNum = PlanesNum(surfaceFormat);
-        std::vector<clMemWrapper> planeSrcList(planesNum);
-        std::vector<clMemWrapper> planeDstList(planesNum);
-        for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-        {
-            planeSrcList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoSrc, planeIdx,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error(
-                    "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n",
-                    planeIdx, IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            memObjSrcList.push_back(planeSrcList[planeIdx]);
-            planeDstList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoDst, planeIdx,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error(
-                    "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n",
-                    planeIdx, IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            memObjDstList.push_back(planeDstList[planeIdx]);
-        }
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!ImageInfoVerify(adapterType, memObjSrcList, width, height,
-                             surfaceSrc, objectSrcHandle))
-        {
-            log_error("Image info verification failed\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
-        {
-            if (!YUVSurfaceSet(surfaceFormat, surfaceSrc,
-                               bufferIn[frameIdx % FRAME_NUM], width, height))
-            {
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-            }
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjSrcList.size()),
-                &memObjSrcList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjDstList.size()),
-                &memObjDstList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            clSamplerWrapper sampler = clCreateSampler(
-                ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error("Unable to create sampler\n");
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-            clProgramWrapper program;
-            clKernelWrapper kernel;
-            const char *progPtr = PROGRAM_STR.c_str();
-            if (create_single_kernel_helper(ctx, &program, &kernel, 1,
-                                            (const char **)&progPtr,
-                                            "TestFunction"))
-                result.ResultSub(CResult::TEST_FAIL);
-            size_t bufferSize = sizeof(cl_int) * 2;
-            clMemWrapper imageRes = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
-                                                   bufferSize, NULL, &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clCreateBuffer failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-            size_t offset = 0;
-            size_t origin[3] = { 0, 0, 0 };
-            std::vector<cl_uchar> out(frameSize, 0);
-            for (size_t i = 0; i < memObjSrcList.size(); ++i)
-            {
-                size_t planeWidth = (i == 0) ? width : width / 2;
-                size_t planeHeight = (i == 0) ? height : height / 2;
-                size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                size_t threads[2] = { planeWidth, planeHeight };
-                error = clSetKernelArg(kernel, 0, sizeof(memObjSrcList[i]),
-                                       &memObjSrcList[i]);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                error = clSetKernelArg(kernel, 1, sizeof(memObjDstList[i]),
-                                       &memObjDstList[i]);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                error = clSetKernelArg(kernel, 2, sizeof(sampler), &sampler);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                error = clSetKernelArg(kernel, 3, sizeof(imageRes), &imageRes);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                size_t localThreads[2];
-                error = get_max_common_2D_work_group_size(ctx, kernel, threads,
-                                                          localThreads);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to get work group size to use");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                error =
-                    clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, threads,
-                                           localThreads, 0, NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to execute test kernel");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                std::vector<cl_uint> imageResOut(2, 0);
-                error = clEnqueueReadBuffer(cmdQueue, imageRes, CL_TRUE, 0,
-                                            bufferSize, &imageResOut[0], 0,
-                                            NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to read buffer");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                if (imageResOut[0] != planeWidth)
-                {
-                    log_error("Invalid width value, test = %i, expected = %i\n",
-                              imageResOut[0], planeWidth);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                if (imageResOut[1] != planeHeight)
-                {
-                    log_error(
-                        "Invalid height value, test = %i, expected = %i\n",
-                        imageResOut[1], planeHeight);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                error = clEnqueueReadImage(cmdQueue, memObjDstList[i], CL_TRUE,
-                                           origin, regionPlane, 0, 0,
-                                           &out[offset], 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueReadImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                offset += planeWidth * planeHeight;
-            }
-            if (!YUVCompare(surfaceFormat, out, bufferExp[frameIdx % FRAME_NUM],
-                            width, height))
-            {
-                log_error(
-                    "Frame idx: %i, OCL objects are different than expected\n",
-                    frameIdx);
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjSrcList.size()),
-                &memObjSrcList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjDstList.size()),
-                &memObjDstList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-            std::vector<cl_uchar> bufferOut(frameSize, 0);
-            if (!YUVSurfaceGet(surfaceFormat, surfaceDst, bufferOut, width,
-                               height))
-            {
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            if (!YUVCompare(surfaceFormat, bufferOut,
-                            bufferExp[frameIdx % FRAME_NUM], width, height))
-            {
-                log_error(
-                    "Frame idx: %i, media surface is different than expected\n",
-                    frameIdx);
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-    }
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-    return result.Result();
-int test_kernel(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-    CResult result;
-#if defined(_WIN32)
-    // D3D9
-    if (kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256,
-                         CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_NV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (kernel_functions(deviceID, context, queue, num_elements, 3, 256, 256,
-                         CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_YV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    // D3D9EX
-    if (kernel_functions(deviceID, context, queue, num_elements, 5, 256, 512,
-                         CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (kernel_functions(deviceID, context, queue, num_elements, 7, 512, 256,
-                         CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                         SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256,
-                         CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (kernel_functions(deviceID, context, queue, num_elements, 15, 128, 128,
-                         CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                         SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    // DXVA
-    if (kernel_functions(deviceID, context, queue, num_elements, 20, 128, 128,
-                         CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (kernel_functions(deviceID, context, queue, num_elements, 40, 64, 64,
-                         CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                         SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (kernel_functions(deviceID, context, queue, num_elements, 5, 512, 512,
-                         CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                         SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (kernel_functions(deviceID, context, queue, num_elements, 2, 1024, 1024,
-                         CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                         SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    return result.Result();
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_get_device_ids.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_get_device_ids.cpp
deleted file mode 100644
index 613a602..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_get_device_ids.cpp
+++ /dev/null
@@ -1,220 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "utils.h"
-int get_device_ids(cl_device_id deviceID, cl_context context,
-                   cl_command_queue queue, int num_elements,
-                   cl_dx9_media_adapter_type_khr adapterType)
-    CResult result;
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    cl_uint devicesExpectedNum = 0;
-    cl_int error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, 0, 0,
-                                  &devicesExpectedNum);
-    if (error != CL_SUCCESS || devicesExpectedNum < 1)
-    {
-        log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error));
-        result.ResultSub(CResult::TEST_FAIL);
-        return result.Result();
-    }
-    std::vector<cl_device_id> devicesExpected(devicesExpectedNum);
-    error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL,
-                           devicesExpectedNum, &devicesExpected[0], 0);
-    if (error != CL_SUCCESS)
-    {
-        log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error));
-        result.ResultSub(CResult::TEST_FAIL);
-        return result.Result();
-    }
-    while (deviceWrapper->AdapterNext())
-    {
-        std::vector<cl_dx9_media_adapter_type_khr> mediaAdapterTypes;
-        mediaAdapterTypes.push_back(adapterType);
-        std::vector<void *> mediaDevices;
-        mediaDevices.push_back(deviceWrapper->Device());
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result)))
-        {
-            return result.Result();
-        }
-        cl_uint devicesAllNum = 0;
-        error = clGetDeviceIDsFromDX9MediaAdapterKHR(
-            gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
-            CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum);
-        if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND)
-        {
-            log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        std::vector<cl_device_id> devicesAll;
-        if (devicesAllNum > 0)
-        {
-            devicesAll.resize(devicesAllNum);
-            error = clGetDeviceIDsFromDX9MediaAdapterKHR(
-                gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
-                CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesAllNum,
-                &devicesAll[0], 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-        }
-        cl_uint devicesPreferredNum = 0;
-        error = clGetDeviceIDsFromDX9MediaAdapterKHR(
-            gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
-            &devicesPreferredNum);
-        if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND)
-        {
-            log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        std::vector<cl_device_id> devicesPreferred;
-        if (devicesPreferredNum > 0)
-        {
-            devicesPreferred.resize(devicesPreferredNum);
-            error = clGetDeviceIDsFromDX9MediaAdapterKHR(
-                gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
-                devicesPreferredNum, &devicesPreferred[0], 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-        }
-        if (devicesAllNum < devicesPreferredNum)
-        {
-            log_error("Invalid number of preferred devices. It should be a "
-                      "subset of all devices\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        for (cl_uint i = 0; i < devicesPreferredNum; ++i)
-        {
-            cl_uint j = 0;
-            for (; j < devicesAllNum; ++j)
-            {
-                if (devicesPreferred[i] == devicesAll[j]) break;
-            }
-            if (j == devicesAllNum)
-            {
-                log_error("Preferred device is not a subset of all devices\n");
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-        for (cl_uint i = 0; i < devicesAllNum; ++i)
-        {
-            cl_uint j = 0;
-            for (; j < devicesExpectedNum; ++j)
-            {
-                if (devicesAll[i] == devicesExpected[j]) break;
-            }
-            if (j == devicesExpectedNum)
-            {
-                log_error("CL_ALL_DEVICES_FOR_MEDIA_ADAPTER_KHR should be a "
-                          "subset of all devices for selected platform\n");
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-    }
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-    return result.Result();
-int test_get_device_ids(cl_device_id deviceID, cl_context context,
-                        cl_command_queue queue, int num_elements)
-    CResult result;
-#if defined(_WIN32)
-    if (get_device_ids(deviceID, context, queue, num_elements,
-                       CL_ADAPTER_D3D9_KHR)
-        != 0)
-    {
-        log_error("\nTest case (D3D9) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (get_device_ids(deviceID, context, queue, num_elements,
-                       CL_ADAPTER_D3D9EX_KHR)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (get_device_ids(deviceID, context, queue, num_elements,
-                       CL_ADAPTER_DXVA_KHR)
-        != 0)
-    {
-        log_error("\nTest case (DXVA) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    return result.Result();
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp
deleted file mode 100644
index fbc616e..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp
+++ /dev/null
@@ -1,419 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "utils.h"
-int interop_user_sync(cl_device_id deviceID, cl_context context,
-                      cl_command_queue queue, int num_elements,
-                      unsigned int width, unsigned int height,
-                      TContextFuncType functionCreate,
-                      cl_dx9_media_adapter_type_khr adapterType,
-                      TSurfaceFormat surfaceFormat,
-                      TSharedHandleType sharedHandle, cl_bool userSync)
-    CResult result;
-    // create device
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    // generate input data
-    std::vector<cl_uchar> bufferIn(width * height * 3 / 2, 0);
-    if (!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-        if (surfaceFormat != SURFACE_FORMAT_NV12
-            && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string syncStr = (userSync == CL_TRUE) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info("Skipping test case, image format is not supported by a "
-                     "device (adapter type: %s, format: %s, shared handle: %s, "
-                     "user sync: %s)\n",
-                     adapterStr.c_str(), formatStr.c_str(),
-                     sharedHandleStr.c_str(), syncStr.c_str());
-            return result.Result();
-        }
-        void *objectSharedHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surface;
-        if (!MediaSurfaceCreate(
-                adapterType, width, height, surfaceFormat, *deviceWrapper,
-                surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false,
-                &objectSharedHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-        cl_context_properties contextProperties[] = {
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            userSync,
-            0,
-        };
-        clContextWrapper ctx;
-        switch (functionCreate)
-        {
-            case CONTEXT_CREATE_DEFAULT:
-                ctx = clCreateContext(&contextProperties[0], 1,
-                                      &gDeviceIDdetected, NULL, NULL, &error);
-                break;
-            case CONTEXT_CREATE_FROM_TYPE:
-                ctx = clCreateContextFromType(&contextProperties[0],
-                                              gDeviceTypeSelected, NULL, NULL,
-                                              &error);
-                break;
-            default:
-                log_error("Unknown context creation function enum\n");
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-                break;
-        }
-        if (error != CL_SUCCESS)
-        {
-            std::string functionName;
-            FunctionContextCreateToString(functionCreate, functionName);
-            log_error("%s failed: %s\n", functionName.c_str(),
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height))
-        {
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-#if defined(_WIN32)
-        cl_dx9_surface_info_khr surfaceInfo;
-        surfaceInfo.resource =
-            *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-        surfaceInfo.shared_handle = objectSharedHandle;
-        void *surfaceInfo = 0;
-        return TEST_NOT_IMPLEMENTED;
-        std::vector<cl_mem> memObjList;
-        unsigned int planesNum = PlanesNum(surfaceFormat);
-        std::vector<clMemWrapper> planesList(planesNum);
-        for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-        {
-            planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error(
-                    "clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n",
-                    planeIdx, IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            memObjList.push_back(planesList[planeIdx]);
-        }
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!ImageInfoVerify(adapterType, memObjList, width, height, surface,
-                             objectSharedHandle))
-        {
-            log_error("Image info verification failed\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        if (userSync == CL_TRUE)
-        {
-#if defined(_WIN32)
-            IDirect3DQuery9 *eventQuery = NULL;
-            switch (adapterType)
-            {
-                case CL_ADAPTER_D3D9_KHR: {
-                    LPDIRECT3DDEVICE9 device =
-                        (LPDIRECT3DDEVICE9)deviceWrapper->Device();
-                    device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
-                    eventQuery->Issue(D3DISSUE_END);
-                    while (S_FALSE
-                           == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
-                        ;
-                }
-                break;
-                case CL_ADAPTER_D3D9EX_KHR: {
-                    LPDIRECT3DDEVICE9EX device =
-                        (LPDIRECT3DDEVICE9EX)deviceWrapper->Device();
-                    device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
-                    eventQuery->Issue(D3DISSUE_END);
-                    while (S_FALSE
-                           == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
-                        ;
-                }
-                break;
-                case CL_ADAPTER_DXVA_KHR: {
-                    CDXVAWrapper *DXVADevice =
-                        dynamic_cast<CDXVAWrapper *>(&(*deviceWrapper));
-                    LPDIRECT3DDEVICE9EX device =
-                        (LPDIRECT3DDEVICE9EX)(DXVADevice->D3D9()).Device();
-                    device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
-                    eventQuery->Issue(D3DISSUE_END);
-                    while (S_FALSE
-                           == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
-                        ;
-                }
-                break;
-                default:
-                    log_error("Unknown adapter type\n");
-                    return false;
-                    break;
-            }
-            if (eventQuery)
-            {
-                eventQuery->Release();
-            }
-            return TEST_NOT_IMPLEMENTED;
-        }
-        error = clEnqueueAcquireDX9MediaSurfacesKHR(
-            cmdQueue, static_cast<cl_uint>(memObjList.size()),
-            &, 0, 0, 0);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        size_t origin[3] = { 0, 0, 0 };
-        size_t offset = 0;
-        size_t frameSize = width * height * 3 / 2;
-        std::vector<cl_uchar> out(frameSize, 0);
-        for (size_t i = 0; i < memObjList.size(); ++i)
-        {
-            size_t planeWidth = (i == 0) ? width : width / 2;
-            size_t planeHeight = (i == 0) ? height : height / 2;
-            size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-            error =
-                clEnqueueReadImage(cmdQueue,, CL_TRUE, origin,
-                                   regionPlane, 0, 0, &, 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReadImage failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-            offset += planeWidth * planeHeight;
-        }
-        if (!YUVCompare(surfaceFormat, out, bufferIn, width, height))
-        {
-            log_error("OCL object verification failed - clEnqueueReadImage\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        error = clEnqueueReleaseDX9MediaSurfacesKHR(
-            cmdQueue, static_cast<cl_uint>(memObjList.size()),
-            &, 0, 0, 0);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        if (userSync == CL_TRUE)
-        {
-            error = clFinish(cmdQueue);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clFinish failed: %s\n", IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-        // shared object verification
-        std::vector<cl_uchar> bufferOut(frameSize, 0);
-        if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
-        {
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height))
-        {
-            log_error("Media surface is different than expected\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-    }
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-    return result.Result();
-int test_interop_user_sync(cl_device_id deviceID, cl_context context,
-                           cl_command_queue queue, int num_elements)
-    const unsigned int WIDTH = 256;
-    const unsigned int HEIGHT = 256;
-    std::vector<cl_dx9_media_adapter_type_khr> adapters;
-#if defined(_WIN32)
-    adapters.push_back(CL_ADAPTER_D3D9_KHR);
-    adapters.push_back(CL_ADAPTER_D3D9EX_KHR);
-    adapters.push_back(CL_ADAPTER_DXVA_KHR);
-    std::vector<TContextFuncType> contextFuncs;
-    contextFuncs.push_back(CONTEXT_CREATE_DEFAULT);
-    contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE);
-    std::vector<TSurfaceFormat> formats;
-    formats.push_back(SURFACE_FORMAT_NV12);
-    formats.push_back(SURFACE_FORMAT_YV12);
-    std::vector<TSharedHandleType> sharedHandleTypes;
-    sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED);
-    sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED);
-    std::vector<cl_bool> sync;
-    sync.push_back(CL_FALSE);
-    sync.push_back(CL_TRUE);
-    CResult result;
-    for (size_t adapterIdx = 0; adapterIdx < adapters.size(); ++adapterIdx)
-    {
-        // iteration through all create context functions
-        for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size();
-             ++contextFuncIdx)
-        {
-            // iteration through YUV formats
-            for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx)
-            {
-                // shared handle enabled or disabled
-                for (size_t sharedHandleIdx = 0;
-                     sharedHandleIdx < sharedHandleTypes.size();
-                     ++sharedHandleIdx)
-                {
-                    // user sync interop disabled or enabled
-                    for (size_t syncIdx = 0; syncIdx < sync.size(); ++syncIdx)
-                    {
-                        if (adapters[adapterIdx] == CL_ADAPTER_D3D9_KHR
-                            && sharedHandleTypes[sharedHandleIdx]
-                                == SHARED_HANDLE_ENABLED)
-                            continue;
-                        if (interop_user_sync(
-                                deviceID, context, queue, num_elements, WIDTH,
-                                HEIGHT, contextFuncs[contextFuncIdx],
-                                adapters[adapterIdx], formats[formatIdx],
-                                sharedHandleTypes[sharedHandleIdx],
-                                sync[syncIdx])
-                            != 0)
-                        {
-                            std::string syncStr = (sync[syncIdx] == CL_TRUE)
-                                ? "user sync enabled"
-                                : "user sync disabled";
-                            std::string sharedHandle =
-                                (sharedHandleTypes[sharedHandleIdx]
-                                 == SHARED_HANDLE_ENABLED)
-                                ? "shared handle"
-                                : "no shared handle";
-                            std::string adapterStr;
-                            std::string formatStr;
-                            SurfaceFormatToString(formats[formatIdx],
-                                                  formatStr);
-                            AdapterToString(adapters[adapterIdx], adapterStr);
-                            log_error("\nTest case - clCreateContext (%s, %s, "
-                                      "%s, %s) failed\n\n",
-                                      adapterStr.c_str(), formatStr.c_str(),
-                                      sharedHandle.c_str(), syncStr.c_str());
-                            result.ResultSub(CResult::TEST_FAIL);
-                        }
-                    }
-                }
-            }
-        }
-    }
-    return result.Result();
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp
deleted file mode 100644
index 1e4e2c4..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp
+++ /dev/null
@@ -1,549 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "utils.h"
-int memory_access(cl_device_id deviceID, cl_context context,
-                  cl_command_queue queue, int num_elements, unsigned int width,
-                  unsigned int height,
-                  cl_dx9_media_adapter_type_khr adapterType,
-                  TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
-    CResult result;
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    // creates device
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    // generate input and expected data
-    size_t frameSize = width * height * 3 / 2;
-    std::vector<cl_uchar> bufferRef0(frameSize, 0);
-    std::vector<cl_uchar> bufferRef1(frameSize, 0);
-    std::vector<cl_uchar> bufferRef2(frameSize, 0);
-    if (!YUVGenerate(surfaceFormat, bufferRef0, width, height, 0, 90)
-        || !YUVGenerate(surfaceFormat, bufferRef1, width, height, 91, 180)
-        || !YUVGenerate(surfaceFormat, bufferRef2, width, height, 181, 255))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    // iterates through all devices
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-        if (surfaceFormat != SURFACE_FORMAT_NV12
-            && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info(
-                "Skipping test case, image format is not supported by a device "
-                "(adapter type: %s, format: %s, shared handle: %s)\n",
-                adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
-            return result.Result();
-        }
-        void *objectSharedHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surface;
-        // creates surface
-        if (!MediaSurfaceCreate(
-                adapterType, width, height, surfaceFormat, *deviceWrapper,
-                surface, (sharedHandle == SHARED_HANDLE_ENABLED) ? true : false,
-                &objectSharedHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-        if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef0, width, height))
-        {
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-        cl_context_properties contextProperties[] = {
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            0,
-        };
-        clContextWrapper ctx = clCreateContext(
-            &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateContext failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        { // memory access write
-#if defined(_WIN32)
-            cl_dx9_surface_info_khr surfaceInfo;
-            surfaceInfo.resource =
-                *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-            surfaceInfo.shared_handle = objectSharedHandle;
-            void *surfaceInfo = 0;
-            return TEST_NOT_IMPLEMENTED;
-            std::vector<cl_mem> memObjList;
-            unsigned int planesNum = PlanesNum(surfaceFormat);
-            std::vector<clMemWrapper> planesList(planesNum);
-            for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-            {
-                planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                    ctx, CL_MEM_WRITE_ONLY, adapterType, &surfaceInfo, planeIdx,
-                    &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateFromDX9MediaSurfaceKHR failed for "
-                              "WRITE_ONLY plane %i: %s\n",
-                              planeIdx, IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                    return result.Result();
-                }
-                memObjList.push_back(planesList[planeIdx]);
-            }
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            size_t offset = 0;
-            size_t origin[3] = { 0, 0, 0 };
-            for (size_t i = 0; i < memObjList.size(); ++i)
-            {
-                size_t planeWidth = (i == 0) ? width : width / 2;
-                size_t planeHeight = (i == 0) ? height : height / 2;
-                size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE,
-                                            origin, regionPlane, 0, 0,
-                                            &bufferRef1[offset], 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueWriteImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                offset += planeWidth * planeHeight;
-            }
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-        std::vector<cl_uchar> bufferOut0(frameSize, 0);
-        if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut0, width, height))
-        {
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!YUVCompare(surfaceFormat, bufferOut0, bufferRef1, width, height))
-        {
-            log_error("Media surface is different than expected\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        { // memory access read
-#if defined(_WIN32)
-            cl_dx9_surface_info_khr surfaceInfo;
-            surfaceInfo.resource =
-                *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-            surfaceInfo.shared_handle = objectSharedHandle;
-            void *surfaceInfo = 0;
-            return TEST_NOT_IMPLEMENTED;
-            std::vector<cl_mem> memObjList;
-            unsigned int planesNum = PlanesNum(surfaceFormat);
-            std::vector<clMemWrapper> planesList(planesNum);
-            for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-            {
-                planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                    ctx, CL_MEM_READ_ONLY, adapterType, &surfaceInfo, planeIdx,
-                    &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateFromDX9MediaSurfaceKHR failed for "
-                              "READ_ONLY plane %i: %s\n",
-                              planeIdx, IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                    return result.Result();
-                }
-                memObjList.push_back(planesList[planeIdx]);
-            }
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            std::vector<cl_uchar> out(frameSize, 0);
-            size_t offset = 0;
-            size_t origin[3] = { 0, 0, 0 };
-            for (size_t i = 0; i < memObjList.size(); ++i)
-            {
-                size_t planeWidth = (i == 0) ? width : width / 2;
-                size_t planeHeight = (i == 0) ? height : height / 2;
-                size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                           origin, regionPlane, 0, 0,
-                                           &out[offset], 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueReadImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                offset += planeWidth * planeHeight;
-            }
-            if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height))
-            {
-                log_error("OCL image (READ_ONLY) is different then expected\n");
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-        std::vector<cl_uchar> bufferOut1(frameSize, 0);
-        if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut1, width, height))
-        {
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!YUVCompare(surfaceFormat, bufferOut1, bufferRef1, width, height))
-        {
-            log_error("Media surface is different than expected\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        { // memory access read write
-#if defined(_WIN32)
-            cl_dx9_surface_info_khr surfaceInfo;
-            surfaceInfo.resource =
-                *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
-            surfaceInfo.shared_handle = objectSharedHandle;
-            void *surfaceInfo = 0;
-            return TEST_NOT_IMPLEMENTED;
-            std::vector<cl_mem> memObjList;
-            unsigned int planesNum = PlanesNum(surfaceFormat);
-            std::vector<clMemWrapper> planesList(planesNum);
-            for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
-            {
-                planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(
-                    ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx,
-                    &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateFromDX9MediaSurfaceKHR failed for "
-                              "READ_WRITE plane %i: %s\n",
-                              planeIdx, IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                    return result.Result();
-                }
-                memObjList.push_back(planesList[planeIdx]);
-            }
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            { // read
-                std::vector<cl_uchar> out(frameSize, 0);
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE,
-                                               origin, regionPlane, 0, 0,
-                                               &out[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += planeWidth * planeHeight;
-                }
-                if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height))
-                {
-                    log_error(
-                        "OCL image (READ_WRITE) is different then expected\n");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // write
-                size_t offset = 0;
-                size_t origin[3] = { 0, 0, 0 };
-                for (size_t i = 0; i < memObjList.size(); ++i)
-                {
-                    size_t planeWidth = (i == 0) ? width : width / 2;
-                    size_t planeHeight = (i == 0) ? height : height / 2;
-                    size_t regionPlane[3] = { planeWidth, planeHeight, 1 };
-                    error = clEnqueueWriteImage(
-                        cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
-                        0, 0, &bufferRef2[offset], 0, 0, 0);
-                    if (error != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueWriteImage failed: %s\n",
-                                  IGetErrorString(error));
-                        result.ResultSub(CResult::TEST_FAIL);
-                    }
-                    offset += planeWidth * planeHeight;
-                }
-            }
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-        std::vector<cl_uchar> bufferOut2(frameSize, 0);
-        if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut2, width, height))
-        {
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!YUVCompare(surfaceFormat, bufferOut2, bufferRef2, width, height))
-        {
-            log_error("Media surface is different than expected\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-    }
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-    return result.Result();
-int test_memory_access(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, int num_elements)
-    CResult result;
-#if defined(_WIN32)
-    // D3D9
-    if (memory_access(deviceID, context, queue, num_elements, 256, 256,
-                      CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (memory_access(deviceID, context, queue, num_elements, 512, 256,
-                      CL_ADAPTER_D3D9_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    // D3D9EX
-    if (memory_access(deviceID, context, queue, num_elements, 256, 512,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (memory_access(deviceID, context, queue, num_elements, 512, 256,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (memory_access(deviceID, context, queue, num_elements, 256, 256,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (memory_access(deviceID, context, queue, num_elements, 128, 128,
-                      CL_ADAPTER_D3D9EX_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    // DXVA
-    if (memory_access(deviceID, context, queue, num_elements, 128, 128,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (memory_access(deviceID, context, queue, num_elements, 64, 64,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_NV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (memory_access(deviceID, context, queue, num_elements, 512, 512,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (memory_access(deviceID, context, queue, num_elements, 1024, 1024,
-                      CL_ADAPTER_DXVA_KHR, SURFACE_FORMAT_YV12,
-                      SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    return result.Result();
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp
deleted file mode 100644
index 0e5d1d1..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp
+++ /dev/null
@@ -1,1319 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <stdio.h>
-#include <stdlib.h>
-#include "harness/errorHelpers.h"
-#include "harness/imageHelpers.h"
-#include "harness/kernelHelpers.h"
-#include "utils.h"
-template <typename T>
-int other_data_types(cl_device_id deviceID, cl_context context,
-                     cl_command_queue queue, int num_elements,
-                     unsigned int iterationNum, unsigned int width,
-                     unsigned int height,
-                     cl_dx9_media_adapter_type_khr adapterType,
-                     TSurfaceFormat surfaceFormat,
-                     TSharedHandleType sharedHandle)
-    const unsigned int FRAME_NUM = 2;
-    const float MAX_VALUE = 0.6f;
-    const std::string PROGRAM_STR =
-        "__kernel void TestFunction( read_only image2d_t imageIn, write_only "
-        "image2d_t imageOut, " NL "                            sampler_t "
-        "sampler, __global int *imageRes)" NL "{" NL
-        "  int w = get_global_id(0);" NL "  int h = get_global_id(1);" NL
-        "  int width = get_image_width(imageIn);" NL
-        "  int height = get_image_height(imageOut);" NL
-        "  float4 color0 = read_imagef(imageIn, sampler, (int2)(w,h)) - "
-        "0.2f;" NL "  float4 color1 = read_imagef(imageIn, sampler, "
-        "(float2)(w,h)) - 0.2f;" NL
-        "  color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, "
-        "0.5);" NL "  write_imagef(imageOut, (int2)(w,h), color0);" NL
-        "  if(w == 0 && h == 0)" NL "  {" NL "    imageRes[0] = width;" NL
-        "    imageRes[1] = height;" NL "  }" NL "}";
-    CResult result;
-    cl_image_format format;
-    if (!SurfaceFormatToOCL(surfaceFormat, format))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    std::auto_ptr<CDeviceWrapper> deviceWrapper;
-    if (!DeviceCreate(adapterType, deviceWrapper))
-    {
-        result.ResultSub(CResult::TEST_ERROR);
-        return result.Result();
-    }
-    while (deviceWrapper->AdapterNext())
-    {
-        cl_int error;
-        // check if the test can be run on the adapter
-        if (CL_SUCCESS
-            != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType,
-                                             deviceWrapper->Device(), result,
-                                             sharedHandle)))
-        {
-            return result.Result();
-        }
-        cl_context_properties contextProperties[] = {
-            (cl_context_properties)gPlatformIDdetected,
-            AdapterTypeToContextInfo(adapterType),
-            (cl_context_properties)deviceWrapper->Device(),
-            0,
-        };
-        clContextWrapper ctx = clCreateContext(
-            &contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateContext failed: %s\n", IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(
-            ctx, gDeviceIDdetected, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("Unable to create command queue: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        if (!SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info(
-                "Skipping test case, image format is not supported by a device "
-                "(adapter type: %s, format: %s, shared handle: %s)\n",
-                adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
-            return result.Result();
-        }
-        if (!ImageFormatCheck(ctx, CL_MEM_OBJECT_IMAGE2D, format))
-        {
-            std::string sharedHandleStr =
-                (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-            std::string formatStr;
-            std::string adapterStr;
-            SurfaceFormatToString(surfaceFormat, formatStr);
-            AdapterToString(adapterType, adapterStr);
-            log_info("Skipping test case, image format is not supported by OCL "
-                     "(adapter type: %s, format: %s, shared handle: %s)\n",
-                     adapterStr.c_str(), formatStr.c_str(),
-                     sharedHandleStr.c_str());
-            return result.Result();
-        }
-        if (format.image_channel_data_type == CL_HALF_FLOAT)
-        {
-            if (DetectFloatToHalfRoundingMode(cmdQueue))
-            {
-                log_error("Unable to detect rounding mode\n");
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-        }
-        std::vector<std::vector<T>> bufferIn(FRAME_NUM);
-        std::vector<std::vector<T>> bufferExp(FRAME_NUM);
-        float step = MAX_VALUE / static_cast<float>(FRAME_NUM);
-        unsigned int planeNum = ChannelNum(surfaceFormat);
-        for (size_t i = 0; i < FRAME_NUM; ++i)
-        {
-            DataGenerate(surfaceFormat, format.image_channel_data_type,
-                         bufferIn[i], width, height, planeNum, step * i,
-                         step * (i + 1));
-            DataGenerate(surfaceFormat, format.image_channel_data_type,
-                         bufferExp[i], width, height, planeNum, step * i,
-                         step * (i + 1), 0.2f);
-        }
-        void *objectSrcHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surfaceSrc;
-        if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat,
-                                *deviceWrapper, surfaceSrc,
-                                (sharedHandle == SHARED_HANDLE_ENABLED) ? true
-                                                                        : false,
-                                &objectSrcHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-        void *objectDstHandle = 0;
-        std::auto_ptr<CSurfaceWrapper> surfaceDst;
-        if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat,
-                                *deviceWrapper, surfaceDst,
-                                (sharedHandle == SHARED_HANDLE_ENABLED) ? true
-                                                                        : false,
-                                &objectDstHandle))
-        {
-            log_error("Media surface creation failed for %i adapter\n",
-                      deviceWrapper->AdapterIdx());
-            result.ResultSub(CResult::TEST_ERROR);
-            return result.Result();
-        }
-#if defined(_WIN32)
-        cl_dx9_surface_info_khr surfaceSrcInfo;
-        CD3D9SurfaceWrapper *dx9SurfaceSrc =
-            (static_cast<CD3D9SurfaceWrapper *>(surfaceSrc.get()));
-        surfaceSrcInfo.resource = *dx9SurfaceSrc;
-        surfaceSrcInfo.shared_handle = objectSrcHandle;
-        cl_dx9_surface_info_khr surfaceDstInfo;
-        CD3D9SurfaceWrapper *dx9SurfaceDst =
-            (static_cast<CD3D9SurfaceWrapper *>(surfaceDst.get()));
-        surfaceDstInfo.resource = *dx9SurfaceDst;
-        surfaceDstInfo.shared_handle = objectDstHandle;
-        void *surfaceSrcInfo = 0;
-        void *surfaceDstInfo = 0;
-        return TEST_NOT_IMPLEMENTED;
-        // create OCL shared object
-        clMemWrapper objectSrcShared = clCreateFromDX9MediaSurfaceKHR(
-            ctx, CL_MEM_READ_WRITE, adapterType, &surfaceSrcInfo, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        clMemWrapper objectDstShared = clCreateFromDX9MediaSurfaceKHR(
-            ctx, CL_MEM_READ_WRITE, adapterType, &surfaceDstInfo, 0, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n",
-                      IGetErrorString(error));
-            result.ResultSub(CResult::TEST_FAIL);
-            return result.Result();
-        }
-        std::vector<cl_mem> memObjList;
-        memObjList.push_back(objectSrcShared);
-        memObjList.push_back(objectDstShared);
-        if (!GetMemObjInfo(objectSrcShared, adapterType, surfaceSrc,
-                           objectSrcHandle))
-        {
-            log_error("Invalid memory object info\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        if (!GetImageInfo(objectSrcShared, format, sizeof(T) * planeNum,
-                          width * sizeof(T) * planeNum, 0, width, height, 0, 0))
-        {
-            log_error("clGetImageInfo failed\n");
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
-        {
-            // surface set
-#if defined(_WIN32)
-            D3DLOCKED_RECT rect;
-            if (FAILED((*dx9SurfaceSrc)->LockRect(&rect, NULL, 0)))
-            {
-                log_error("Surface lock failed\n");
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-            }
-            size_t pitch = rect.Pitch / sizeof(T);
-            size_t lineSize = width * planeNum * sizeof(T);
-            T *ptr = static_cast<T *>(rect.pBits);
-            for (size_t y = 0; y < height; ++y)
-                memcpy(ptr + y * pitch,
-                       &bufferIn[frameIdx % FRAME_NUM][y * width * planeNum],
-                       lineSize);
-            (*dx9SurfaceSrc)->UnlockRect();
-            void *surfaceInfo = 0;
-            return TEST_NOT_IMPLEMENTED;
-            error = clEnqueueAcquireDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueAcquireMediaSurfaceKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-                return result.Result();
-            }
-            size_t origin[3] = { 0, 0, 0 };
-            size_t region[3] = { width, height, 1 };
-            { // read operation
-                std::vector<T> out(planeNum * width * height, 0);
-                error =
-                    clEnqueueReadImage(cmdQueue, objectSrcShared, CL_TRUE,
-                                       origin, region, 0, 0, &out[0], 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueReadImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                if (!DataCompare(surfaceFormat, format.image_channel_data_type,
-                                 out, bufferIn[frameIdx % FRAME_NUM], width,
-                                 height, planeNum))
-                {
-                    log_error("Frame idx: %i, OCL object is different then "
-                              "expected\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // write operation
-                error = clEnqueueWriteImage(
-                    cmdQueue, objectSrcShared, CL_TRUE, origin, region, 0, 0,
-                    &bufferExp[frameIdx % FRAME_NUM][0], 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueWriteImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // kernel operations
-                clSamplerWrapper sampler = clCreateSampler(
-                    ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to create sampler\n");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                size_t threads[2] = { width, height };
-                clProgramWrapper program;
-                clKernelWrapper kernel;
-                const char *progPtr = PROGRAM_STR.c_str();
-                if (create_single_kernel_helper(ctx, &program, &kernel, 1,
-                                                (const char **)&progPtr,
-                                                "TestFunction"))
-                    result.ResultSub(CResult::TEST_FAIL);
-                error = clSetKernelArg(kernel, 0, sizeof(objectSrcShared),
-                                       &(objectSrcShared));
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                error = clSetKernelArg(kernel, 1, sizeof(objectDstShared),
-                                       &(objectDstShared));
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                error = clSetKernelArg(kernel, 2, sizeof(sampler), &sampler);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to set kernel arguments");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                size_t bufferSize = sizeof(cl_int) * 2;
-                clMemWrapper imageRes = clCreateBuffer(
-                    ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clCreateBuffer failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                error = clSetKernelArg(kernel, 3, sizeof(imageRes), &imageRes);
-                size_t localThreads[2];
-                error = get_max_common_2D_work_group_size(ctx, kernel, threads,
-                                                          localThreads);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to get work group size to use");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                error =
-                    clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, threads,
-                                           localThreads, 0, NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to execute test kernel");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                std::vector<cl_uint> imageResOut(2, 0);
-                error = clEnqueueReadBuffer(cmdQueue, imageRes, CL_TRUE, 0,
-                                            bufferSize, &imageResOut[0], 0,
-                                            NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("Unable to read buffer");
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                if (imageResOut[0] != width)
-                {
-                    log_error("Invalid width value, test = %i, expected = %i\n",
-                              imageResOut[0], width);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                if (imageResOut[1] != height)
-                {
-                    log_error(
-                        "Invalid height value, test = %i, expected = %i\n",
-                        imageResOut[1], height);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            { // map operation
-                size_t mapOrigin[3] = { 0, 0, 0 };
-                size_t mapRegion[3] = { width, height, 1 };
-                std::vector<T> out(width * height * planeNum, 0);
-                size_t rowPitch = 0;
-                size_t slicePitch = 0;
-                void *mapPtr = clEnqueueMapImage(
-                    cmdQueue, objectDstShared, CL_TRUE,
-                    CL_MAP_READ | CL_MAP_WRITE, mapOrigin, mapRegion, &rowPitch,
-                    &slicePitch, 0, 0, 0, &error);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueMapImage failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                for (size_t y = 0; y < height; ++y)
-                    memcpy(&out[y * width * planeNum],
-                           static_cast<T *>(mapPtr) + y * rowPitch / sizeof(T),
-                           width * planeNum * sizeof(T));
-                if (!DataCompare(surfaceFormat, format.image_channel_data_type,
-                                 out, bufferIn[frameIdx % FRAME_NUM], width,
-                                 height, planeNum))
-                {
-                    log_error("Frame idx: %i, Mapped OCL object is different "
-                              "then expected\n",
-                              frameIdx);
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-                for (size_t y = 0; y < height; ++y)
-                    memcpy(
-                        static_cast<T *>(mapPtr) + y * rowPitch / sizeof(T),
-                        &bufferExp[frameIdx % FRAME_NUM][y * width * planeNum],
-                        width * planeNum * sizeof(T));
-                error = clEnqueueUnmapMemObject(cmdQueue, objectDstShared,
-                                                mapPtr, 0, 0, 0);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("clEnqueueUnmapMemObject failed: %s\n",
-                              IGetErrorString(error));
-                    result.ResultSub(CResult::TEST_FAIL);
-                }
-            }
-            error = clEnqueueReleaseDX9MediaSurfacesKHR(
-                cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
-            if (error != CL_SUCCESS)
-            {
-                log_error("clEnqueueReleaseMediaSurfaceKHR failed: %s\n",
-                          IGetErrorString(error));
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-            std::vector<T> out(width * height * planeNum, 0);
-            // surface get
-#if defined(_WIN32)
-            if (FAILED((*dx9SurfaceDst)->LockRect(&rect, NULL, 0)))
-            {
-                log_error("Surface lock failed\n");
-                result.ResultSub(CResult::TEST_ERROR);
-                return result.Result();
-            }
-            pitch = rect.Pitch / sizeof(T);
-            lineSize = width * planeNum * sizeof(T);
-            ptr = static_cast<T *>(rect.pBits);
-            for (size_t y = 0; y < height; ++y)
-                memcpy(&out[y * width * planeNum], ptr + y * pitch, lineSize);
-            (*dx9SurfaceDst)->UnlockRect();
-            return TEST_NOT_IMPLEMENTED;
-            if (!DataCompare(surfaceFormat, format.image_channel_data_type, out,
-                             bufferExp[frameIdx % FRAME_NUM], width, height,
-                             planeNum))
-            {
-                log_error(
-                    "Frame idx: %i, media object is different then expected\n",
-                    frameIdx);
-                result.ResultSub(CResult::TEST_FAIL);
-            }
-        }
-    }
-    if (deviceWrapper->Status() != DEVICE_PASS)
-    {
-        std::string adapterName;
-        AdapterToString(adapterType, adapterName);
-        if (deviceWrapper->Status() == DEVICE_FAIL)
-        {
-            log_error("%s init failed\n", adapterName.c_str());
-            result.ResultSub(CResult::TEST_FAIL);
-        }
-        else
-        {
-            log_error("%s init incomplete due to unsupported device\n",
-                      adapterName.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-    return result.Result();
-int test_other_data_types(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements)
-    CResult result;
-#if defined(_WIN32)
-    // D3D9
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   64, 256, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  256, 128, CL_ADAPTER_D3D9_KHR,
-                                  SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    512, 256, CL_ADAPTER_D3D9_KHR,
-                                    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, L16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 512, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, A8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   1024, 32, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(
-            deviceID, context, queue, num_elements, 10, 32, 1024,
-        != 0)
-    {
-        log_error("\nTest case (D3D9, G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(
-            deviceID, context, queue, num_elements, 10, 64, 64,
-        != 0)
-    {
-        log_error("\nTest case (D3D9, G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(
-            deviceID, context, queue, num_elements, 10, 256, 256,
-        != 0)
-    {
-        log_error("\nTest case (D3D9, G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 128, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, A8L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   128, 512, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_A32B32G32R32F,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9, A32B32G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  128, 128, CL_ADAPTER_D3D9_KHR,
-                                  SURFACE_FORMAT_A16B16G16R16F,
-                                  SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9, A16B16G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    64, 128, CL_ADAPTER_D3D9_KHR,
-                                    SURFACE_FORMAT_A16B16G16R16,
-                                    SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9, A16B16G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   128, 64, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_A8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, A8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   16, 512, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_X8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, X8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 16, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_A8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, A8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 256, CL_ADAPTER_D3D9_KHR,
-                                   SURFACE_FORMAT_X8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9, X8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    // D3D9EX
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   64, 256, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   64, 256, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  256, 128, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  256, 128, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    512, 256, CL_ADAPTER_D3D9EX_KHR,
-                                    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, L16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    512, 256, CL_ADAPTER_D3D9EX_KHR,
-                                    SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, L16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   1024, 32, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   1024, 32, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, L8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   32, 1024, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_G32R32F,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   32, 1024, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_G32R32F,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G32R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  64, 64, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_G16R16F,
-                                  SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  64, 64, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G16R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    256, 256, CL_ADAPTER_D3D9EX_KHR,
-                                    SURFACE_FORMAT_G16R16,
-                                    SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(
-            deviceID, context, queue, num_elements, 10, 256, 256,
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, G16R16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 128, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 128, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8L8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   128, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A32B32G32R32F,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A32B32G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   128, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A32B32G32R32F,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A32B32G32R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  128, 128, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_A16B16G16R16F,
-                                  SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A16B16G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  128, 128, CL_ADAPTER_D3D9EX_KHR,
-                                  SURFACE_FORMAT_A16B16G16R16F,
-                                  SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A16B16G16R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    64, 128, CL_ADAPTER_D3D9EX_KHR,
-                                    SURFACE_FORMAT_A16B16G16R16,
-                                    SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A16B16G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    64, 128, CL_ADAPTER_D3D9EX_KHR,
-                                    SURFACE_FORMAT_A16B16G16R16,
-                                    SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A16B16G16R16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   128, 64, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   128, 64, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8B8G8R8,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8B8G8R8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   16, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_X8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, X8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   16, 512, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_X8B8G8R8,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, X8B8G8R8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 16, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, A8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 16, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_A8R8G8B8,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, A8R8G8B8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 256, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_X8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (D3D9EX, X8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 256, CL_ADAPTER_D3D9EX_KHR,
-                                   SURFACE_FORMAT_X8R8G8B8,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (D3D9EX, X8R8G8B8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    // DXVA
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   64, 256, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   64, 256, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  256, 128, CL_ADAPTER_DXVA_KHR,
-                                  SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  256, 128, CL_ADAPTER_DXVA_KHR,
-                                  SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    512, 256, CL_ADAPTER_DXVA_KHR,
-                                    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, L16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    512, 256, CL_ADAPTER_DXVA_KHR,
-                                    SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, L16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 512, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 512, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   1024, 32, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   1024, 32, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, L8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(
-            deviceID, context, queue, num_elements, 10, 32, 1024,
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(
-            deviceID, context, queue, num_elements, 10, 32, 1024,
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G32R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(
-            deviceID, context, queue, num_elements, 10, 64, 64,
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  64, 64, CL_ADAPTER_DXVA_KHR,
-                                  SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G16R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(
-            deviceID, context, queue, num_elements, 10, 256, 256,
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(
-            deviceID, context, queue, num_elements, 10, 256, 256,
-        != 0)
-    {
-        log_error("\nTest case (DXVA, G16R16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 128, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8L8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 128, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8L8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   128, 512, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A32B32G32R32F,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (DXVA, A32B32G32R32F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_float>(deviceID, context, queue, num_elements, 10,
-                                   128, 512, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A32B32G32R32F,
-                                   SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (DXVA, A32B32G32R32F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  128, 128, CL_ADAPTER_DXVA_KHR,
-                                  SURFACE_FORMAT_A16B16G16R16F,
-                                  SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (DXVA, A16B16G16R16F, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_half>(deviceID, context, queue, num_elements, 10,
-                                  128, 128, CL_ADAPTER_DXVA_KHR,
-                                  SURFACE_FORMAT_A16B16G16R16F,
-                                  SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (DXVA, A16B16G16R16F, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    64, 128, CL_ADAPTER_DXVA_KHR,
-                                    SURFACE_FORMAT_A16B16G16R16,
-                                    SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error(
-            "\nTest case (DXVA, A16B16G16R16, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10,
-                                    64, 128, CL_ADAPTER_DXVA_KHR,
-                                    SURFACE_FORMAT_A16B16G16R16,
-                                    SHARED_HANDLE_ENABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A16B16G16R16, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   128, 64, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(
-            deviceID, context, queue, num_elements, 10, 128, 64,
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8B8G8R8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   16, 512, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_X8B8G8R8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, X8B8G8R8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(
-            deviceID, context, queue, num_elements, 10, 16, 512,
-        != 0)
-    {
-        log_error("\nTest case (DXVA, X8B8G8R8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   512, 16, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_A8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(
-            deviceID, context, queue, num_elements, 10, 512, 16,
-        != 0)
-    {
-        log_error("\nTest case (DXVA, A8R8G8B8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10,
-                                   256, 256, CL_ADAPTER_DXVA_KHR,
-                                   SURFACE_FORMAT_X8R8G8B8,
-                                   SHARED_HANDLE_DISABLED)
-        != 0)
-    {
-        log_error("\nTest case (DXVA, X8R8G8B8, no shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    if (other_data_types<cl_uchar>(
-            deviceID, context, queue, num_elements, 10, 256, 256,
-        != 0)
-    {
-        log_error("\nTest case (DXVA, X8R8G8B8, shared handle) failed\n\n");
-        result.ResultSub(CResult::TEST_FAIL);
-    }
-    return result.Result();
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.cpp
deleted file mode 100644
index 87eb13c..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.cpp
+++ /dev/null
@@ -1,1664 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "utils.h"
-#include "harness/errorHelpers.h"
-#include "harness/imageHelpers.h"
-#include "harness/rounding_mode.h"
-#include <math.h>
-#include <CL/cl_half.h>
-static RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
-CResult::CResult(): _result(TEST_PASS), _resultLast(TEST_NORESULT) {}
-CResult::~CResult() {}
-CResult::TTestResult CResult::ResultLast() const { return _resultLast; }
-int CResult::Result() const
-    switch (_result)
-    {
-        case TEST_NORESULT:
-        case TEST_NOTSUPPORTED:
-        case TEST_PASS: return 0; break;
-        case TEST_FAIL: return 1; break;
-        case TEST_ERROR: return 2; break;
-        default: return -1; break;
-    }
-void CResult::ResultSub(TTestResult result)
-    _resultLast = result;
-    if (static_cast<int>(result) > static_cast<int>(_result)) _result = result;
-void FunctionContextCreateToString(TContextFuncType contextCreateFunction,
-                                   std::string &contextFunction)
-    switch (contextCreateFunction)
-    {
-        case CONTEXT_CREATE_DEFAULT: contextFunction = "CreateContext"; break;
-            contextFunction = "CreateContextFromType";
-            break;
-        default:
-            contextFunction = "Unknown";
-            log_error("FunctionContextCreateToString(): Unknown create "
-                      "function enum!");
-            break;
-    }
-void AdapterToString(cl_dx9_media_adapter_type_khr adapterType,
-                     std::string &adapter)
-    switch (adapterType)
-    {
-        case CL_ADAPTER_D3D9_KHR: adapter = "D3D9"; break;
-        case CL_ADAPTER_D3D9EX_KHR: adapter = "D3D9EX"; break;
-        case CL_ADAPTER_DXVA_KHR: adapter = "DXVA"; break;
-        default:
-            adapter = "Unknown";
-            log_error("AdapterToString(): Unknown adapter type!");
-            break;
-    }
-AdapterTypeToContextInfo(cl_dx9_media_adapter_type_khr adapterType)
-    switch (adapterType)
-    {
-        case CL_ADAPTER_D3D9_KHR: return CL_CONTEXT_ADAPTER_D3D9_KHR; break;
-        case CL_ADAPTER_D3D9EX_KHR: return CL_CONTEXT_ADAPTER_D3D9EX_KHR; break;
-        default:
-            log_error("AdapterTypeToContextInfo(): Unknown adapter type!");
-            return 0;
-            break;
-    }
-void YUVGenerateNV12(std::vector<cl_uchar> &yuv, unsigned int width,
-                     unsigned int height, cl_uchar valueMin, cl_uchar valueMax,
-                     double valueAdd)
-    yuv.clear();
-    yuv.resize(width * height * 3 / 2, 0);
-    double min = static_cast<double>(valueMin);
-    double max = static_cast<double>(valueMax);
-    double range = 255;
-    double add = static_cast<double>(valueAdd * range);
-    double stepX = (max - min) / static_cast<double>(width);
-    double stepY = (max - min) / static_cast<double>(height);
-    // generate Y plane
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        unsigned int offset = i * width;
-        double valueYPlane0 = static_cast<double>(stepY * i);
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            double valueXPlane0 = static_cast<double>(stepX * j);
-   + j) = static_cast<cl_uchar>(
-                min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
-        }
-    }
-    // generate UV planes
-    for (unsigned int i = 0; i < height / 2; ++i)
-    {
-        unsigned int offset = width * height + i * width;
-        double valueYPlane1 = static_cast<double>(stepY * i);
-        double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
-        for (unsigned int j = 0; j < width / 2; ++j)
-        {
-            double valueXPlane1 = static_cast<double>(stepX * j);
-            double valueXPlane2 = static_cast<double>(stepX * (width / 2 + j));
-   + j * 2) = static_cast<cl_uchar>(
-                min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
-   + j * 2 + 1) = static_cast<cl_uchar>(
-                min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
-        }
-    }
-void YUVGenerateYV12(std::vector<cl_uchar> &yuv, unsigned int width,
-                     unsigned int height, cl_uchar valueMin, cl_uchar valueMax,
-                     double valueAdd /*= 0.0*/)
-    yuv.clear();
-    yuv.resize(width * height * 3 / 2, 0);
-    double min = static_cast<double>(valueMin);
-    double max = static_cast<double>(valueMax);
-    double range = 255;
-    double add = static_cast<double>(valueAdd * range);
-    double stepX = (max - min) / static_cast<double>(width);
-    double stepY = (max - min) / static_cast<double>(height);
-    unsigned offset = 0;
-    // generate Y plane
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        unsigned int plane0Offset = offset + i * width;
-        double valueYPlane0 = static_cast<double>(stepY * i);
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            double valueXPlane0 = static_cast<double>(stepX * j);
-   + j) = static_cast<cl_uchar>(
-                min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
-        }
-    }
-    // generate V plane
-    offset += width * height;
-    for (unsigned int i = 0; i < height / 2; ++i)
-    {
-        unsigned int plane1Offset = offset + i * width / 2;
-        double valueYPlane1 = static_cast<double>(stepY * i);
-        for (unsigned int j = 0; j < width / 2; ++j)
-        {
-            double valueXPlane1 = static_cast<double>(stepX * j);
-   + j) = static_cast<cl_uchar>(
-                min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
-        }
-    }
-    // generate U plane
-    offset += width * height / 4;
-    for (unsigned int i = 0; i < height / 2; ++i)
-    {
-        unsigned int plane2Offset = offset + i * width / 2;
-        double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
-        for (unsigned int j = 0; j < width / 2; ++j)
-        {
-            double valueXPlane2 = static_cast<double>(stepX * j);
-   + j) = static_cast<cl_uchar>(
-                min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
-        }
-    }
-bool YUVGenerate(TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv,
-                 unsigned int width, unsigned int height, cl_uchar valueMin,
-                 cl_uchar valueMax, double valueAdd /*= 0.0*/)
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_NV12:
-            YUVGenerateNV12(yuv, width, height, valueMin, valueMax, valueAdd);
-            break;
-        case SURFACE_FORMAT_YV12:
-            YUVGenerateYV12(yuv, width, height, valueMin, valueMax, valueAdd);
-            break;
-        default:
-            log_error("YUVGenerate(): Invalid surface type\n");
-            return false;
-            break;
-    }
-    return true;
-bool YUVSurfaceSetNV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       const std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height)
-#if defined(_WIN32)
-    CD3D9SurfaceWrapper *d3dSurface =
-        static_cast<CD3D9SurfaceWrapper *>(surface.get());
-    D3DLOCKED_RECT rect;
-    if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
-    {
-        log_error("YUVSurfaceSetNV12(): Surface lock failed\n");
-        return false;
-    }
-    size_t pitch = rect.Pitch / sizeof(cl_uchar);
-    size_t lineSize = width * sizeof(cl_uchar);
-    cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
-    for (size_t y = 0; y < height; ++y)
-        memcpy(ptr + y * pitch, & * width), lineSize);
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(ptr + height * pitch + y * pitch,
-               & * height + y * width), lineSize);
-    (*d3dSurface)->UnlockRect();
-    return true;
-    return false;
-bool YUVSurfaceSetYV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       const std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height)
-#if defined(_WIN32)
-    CD3D9SurfaceWrapper *d3dSurface =
-        static_cast<CD3D9SurfaceWrapper *>(surface.get());
-    D3DLOCKED_RECT rect;
-    if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
-    {
-        log_error("YUVSurfaceSetYV12(): Surface lock failed!\n");
-        return false;
-    }
-    size_t pitch = rect.Pitch / sizeof(cl_uchar);
-    size_t pitchHalf = pitch / 2;
-    size_t lineSize = width * sizeof(cl_uchar);
-    size_t lineHalfSize = lineSize / 2;
-    size_t surfaceOffset = 0;
-    size_t yuvOffset = 0;
-    cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
-    for (size_t y = 0; y < height; ++y)
-        memcpy(ptr + surfaceOffset + y * pitch, & + y * width),
-               lineSize);
-    surfaceOffset += height * pitch;
-    yuvOffset += width * height;
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(ptr + surfaceOffset + y * pitchHalf,
-               & + y * lineHalfSize), lineHalfSize);
-    surfaceOffset += pitchHalf * height / 2;
-    yuvOffset += width * height / 4;
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(ptr + surfaceOffset + y * pitchHalf,
-               & + y * lineHalfSize), lineHalfSize);
-    (*d3dSurface)->UnlockRect();
-    return true;
-    return false;
-bool YUVSurfaceSet(TSurfaceFormat surfaceFormat,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   const std::vector<cl_uchar> &yuv, unsigned int width,
-                   unsigned int height)
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_NV12:
-            if (!YUVSurfaceSetNV12(surface, yuv, width, height)) return false;
-            break;
-        case SURFACE_FORMAT_YV12:
-            if (!YUVSurfaceSetYV12(surface, yuv, width, height)) return false;
-            break;
-        default:
-            log_error("YUVSurfaceSet(): Invalid surface type!\n");
-            return false;
-            break;
-    }
-    return true;
-bool YUVSurfaceGetNV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height)
-#if defined(_WIN32)
-    CD3D9SurfaceWrapper *d3dSurface =
-        static_cast<CD3D9SurfaceWrapper *>(surface.get());
-    D3DLOCKED_RECT rect;
-    if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
-    {
-        log_error("YUVSurfaceGetNV12(): Surface lock failed!\n");
-        return false;
-    }
-    size_t pitch = rect.Pitch / sizeof(cl_uchar);
-    size_t lineSize = width * sizeof(cl_uchar);
-    cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
-    size_t yuvOffset = 0;
-    size_t surfaceOffset = 0;
-    for (size_t y = 0; y < height; ++y)
-        memcpy(& + y * width), ptr + y * pitch, lineSize);
-    yuvOffset += width * height;
-    surfaceOffset += pitch * height;
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(& + y * width), ptr + surfaceOffset + y * pitch,
-               lineSize);
-    (*d3dSurface)->UnlockRect();
-    return true;
-    return false;
-bool YUVSurfaceGetYV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height)
-#if defined(_WIN32)
-    CD3D9SurfaceWrapper *d3dSurface =
-        static_cast<CD3D9SurfaceWrapper *>(surface.get());
-    D3DLOCKED_RECT rect;
-    if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
-    {
-        log_error("YUVSurfaceGetYV12(): Surface lock failed!\n");
-        return false;
-    }
-    size_t pitch = rect.Pitch / sizeof(cl_uchar);
-    size_t pitchHalf = pitch / 2;
-    size_t lineSize = width * sizeof(cl_uchar);
-    size_t lineHalfSize = lineSize / 2;
-    size_t surfaceOffset = 0;
-    size_t yuvOffset = 0;
-    cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
-    for (size_t y = 0; y < height; ++y)
-        memcpy(& + y * width), ptr + surfaceOffset + y * pitch,
-               lineSize);
-    surfaceOffset += pitch * height;
-    yuvOffset += width * height;
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(& + y * lineHalfSize),
-               ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
-    surfaceOffset += pitchHalf * height / 2;
-    yuvOffset += width * height / 4;
-    for (size_t y = 0; y < height / 2; ++y)
-        memcpy(& + y * lineHalfSize),
-               ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
-    (*d3dSurface)->UnlockRect();
-    return true;
-    return false;
-bool YUVSurfaceGet(TSurfaceFormat surfaceFormat,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   std::vector<cl_uchar> &yuv, unsigned int width,
-                   unsigned int height)
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_NV12:
-            if (!YUVSurfaceGetNV12(surface, yuv, width, height)) return false;
-            break;
-        case SURFACE_FORMAT_YV12:
-            if (!YUVSurfaceGetYV12(surface, yuv, width, height)) return false;
-            break;
-        default:
-            log_error("YUVSurfaceGet(): Invalid surface type!\n");
-            return false;
-            break;
-    }
-    return true;
-bool YUVCompareNV12(const std::vector<cl_uchar> &yuvTest,
-                    const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                    unsigned int height)
-    // plane 0 verification
-    size_t offset = 0;
-    for (size_t y = 0; y < height; ++y)
-    {
-        size_t plane0Offset = offset + width * y;
-        for (size_t x = 0; x < width; ++x)
-        {
-            if (yuvTest[plane0Offset + x] != yuvRef[plane0Offset + x])
-            {
-                log_error("Plane 0 (Y) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane0Offset + x], yuvTest[plane0Offset + x],
-                          x, y);
-                return false;
-            }
-        }
-    }
-    // plane 1 and 2 verification
-    offset += width * height;
-    for (size_t y = 0; y < height / 2; ++y)
-    {
-        size_t plane12Offset = offset + width * y;
-        for (size_t x = 0; x < width / 2; ++x)
-        {
-            if ( + 2 * x)
-                != + 2 * x))
-            {
-                log_error("Plane 1 (U) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane12Offset + 2 * x],
-                          yuvTest[plane12Offset + 2 * x], x, y);
-                return false;
-            }
-            if ( + 2 * x + 1)
-                != + 2 * x + 1))
-            {
-                log_error("Plane 2 (V) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane12Offset + 2 * x + 1],
-                          yuvTest[plane12Offset + 2 * x + 1], x, y);
-                return false;
-            }
-        }
-    }
-    return true;
-bool YUVCompareYV12(const std::vector<cl_uchar> &yuvTest,
-                    const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                    unsigned int height)
-    // plane 0 verification
-    size_t offset = 0;
-    for (size_t y = 0; y < height; ++y)
-    {
-        size_t plane0Offset = width * y;
-        for (size_t x = 0; x < width; ++x)
-        {
-            if ( + x) != + x))
-            {
-                log_error("Plane 0 (Y) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane0Offset + x], yuvTest[plane0Offset + x],
-                          x, y);
-                return false;
-            }
-        }
-    }
-    // plane 1 verification
-    offset += width * height;
-    for (size_t y = 0; y < height / 2; ++y)
-    {
-        size_t plane1Offset = offset + width * y / 2;
-        for (size_t x = 0; x < width / 2; ++x)
-        {
-            if ( + x) != + x))
-            {
-                log_error("Plane 1 (V) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane1Offset + x], yuvTest[plane1Offset + x],
-                          x, y);
-                return false;
-            }
-        }
-    }
-    // plane 2 verification
-    offset += width * height / 4;
-    for (size_t y = 0; y < height / 2; ++y)
-    {
-        size_t plane2Offset = offset + width * y / 2;
-        for (size_t x = 0; x < width / 2; ++x)
-        {
-            if ( + x) != + x))
-            {
-                log_error("Plane 2 (U) is different than expected, reference "
-                          "value: %i, test value: %i, x: %i, y: %i\n",
-                          yuvRef[plane2Offset + x], yuvTest[plane2Offset + x],
-                          x, y);
-                return false;
-            }
-        }
-    }
-    return true;
-bool YUVCompare(TSurfaceFormat surfaceFormat,
-                const std::vector<cl_uchar> &yuvTest,
-                const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                unsigned int height)
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_NV12:
-            if (!YUVCompareNV12(yuvTest, yuvRef, width, height))
-            {
-                log_error("OCL object is different than expected!\n");
-                return false;
-            }
-            break;
-        case SURFACE_FORMAT_YV12:
-            if (!YUVCompareYV12(yuvTest, yuvRef, width, height))
-            {
-                log_error("OCL object is different than expected!\n");
-                return false;
-            }
-            break;
-        default:
-            log_error("YUVCompare(): Invalid surface type!\n");
-            return false;
-            break;
-    }
-    return true;
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<float> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin /*= 0.0f*/, float cmax /*= 1.0f*/,
-                  float add /*= 0.0f*/)
-    data.clear();
-    data.reserve(width * height * channelNum);
-    double valueMin = static_cast<double>(cmin);
-    double valueMax = static_cast<double>(cmax);
-    double stepX = (valueMax - valueMin) / static_cast<double>(width);
-    double stepY = (valueMax - valueMin) / static_cast<double>(height);
-    double valueAdd = static_cast<double>(add);
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        double valueY = static_cast<double>(stepY * i);
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            double valueX = static_cast<double>(stepX * j);
-            switch (channelNum)
-            {
-                case 1:
-                    data.push_back(static_cast<float>(valueMin + valueX / 2
-                                                      + valueY / 2 + valueAdd));
-                    break;
-                case 2:
-                    data.push_back(
-                        static_cast<float>(valueMin + valueX + valueAdd));
-                    data.push_back(
-                        static_cast<float>(valueMin + valueY + valueAdd));
-                    break;
-                case 4:
-                    data.push_back(
-                        static_cast<float>(valueMin + valueX + valueAdd));
-                    data.push_back(
-                        static_cast<float>(valueMin + valueY + valueAdd));
-                    data.push_back(
-                        static_cast<float>(valueMin + valueX / 2 + valueAdd));
-                    data.push_back(
-                        static_cast<float>(valueMin + valueY / 2 + valueAdd));
-                    break;
-                default:
-                    log_error("DataGenerate(): invalid channel number!");
-                    return;
-                    break;
-            }
-        }
-    }
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<cl_half> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin /*= 0.0f*/, float cmax /*= 1.0f*/,
-                  float add /*= 0.0f*/)
-    data.clear();
-    data.reserve(width * height * channelNum);
-    double valueMin = static_cast<double>(cmin);
-    double valueMax = static_cast<double>(cmax);
-    double stepX = (valueMax - valueMin) / static_cast<double>(width);
-    double stepY = (valueMax - valueMin) / static_cast<double>(height);
-    switch (type)
-    {
-        case CL_HALF_FLOAT: {
-            double valueAdd = static_cast<double>(add);
-            for (unsigned int i = 0; i < height; ++i)
-            {
-                double valueY = static_cast<double>(stepY * i);
-                for (unsigned int j = 0; j < width; ++j)
-                {
-                    double valueX = static_cast<double>(stepX * j);
-                    switch (channelNum)
-                    {
-                        case 1:
-                            data.push_back(convert_float_to_half(
-                                static_cast<float>(valueMin + valueX / 2
-                                                   + valueY / 2 + valueAdd)));
-                            break;
-                        case 2:
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueX + valueAdd)));
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueY + valueAdd)));
-                            break;
-                        case 4:
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueX + valueAdd)));
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueY + valueAdd)));
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueX / 2 + valueAdd)));
-                            data.push_back(
-                                convert_float_to_half(static_cast<float>(
-                                    valueMin + valueY / 2 + valueAdd)));
-                            break;
-                        default:
-                            log_error(
-                                "DataGenerate(): invalid channel number!");
-                            return;
-                            break;
-                    }
-                }
-            }
-            break;
-        }
-        case CL_UNORM_INT16: {
-            double range = 65535;
-            double valueAdd = static_cast<double>(add * range);
-            for (unsigned int i = 0; i < height; ++i)
-            {
-                double valueY = static_cast<double>(stepY * i * range);
-                for (unsigned int j = 0; j < width; ++j)
-                {
-                    double valueX = static_cast<double>(stepX * j * range);
-                    switch (channelNum)
-                    {
-                        case 1:
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueX / 2 + valueY / 2 + valueAdd));
-                            break;
-                        case 2:
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueX + valueAdd));
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueY + valueAdd));
-                            break;
-                        case 4:
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueX + valueAdd));
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueY + valueAdd));
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueX / 2 + valueAdd));
-                            data.push_back(static_cast<cl_ushort>(
-                                valueMin + valueY / 2 + valueAdd));
-                            break;
-                        default:
-                            log_error(
-                                "DataGenerate(): invalid channel number!");
-                            return;
-                            break;
-                    }
-                }
-            }
-        }
-        break;
-        default:
-            log_error("DataGenerate(): unknown data type!");
-            return;
-            break;
-    }
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<cl_uchar> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin /*= 0.0f*/, float cmax /*= 1.0f*/,
-                  float add /*= 0.0f*/)
-    data.clear();
-    data.reserve(width * height * channelNum);
-    double valueMin = static_cast<double>(cmin);
-    double valueMax = static_cast<double>(cmax);
-    double stepX = (valueMax - valueMin) / static_cast<double>(width);
-    double stepY = (valueMax - valueMin) / static_cast<double>(height);
-    double range = 255;
-    double valueAdd = static_cast<double>(add * range);
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        double valueY = static_cast<double>(stepY * i * range);
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            double valueX = static_cast<double>(stepX * j * range);
-            switch (channelNum)
-            {
-                case 1:
-                    data.push_back(static_cast<cl_uchar>(
-                        valueMin + valueX / 2 + valueY / 2 + valueAdd));
-                    break;
-                case 2:
-                    data.push_back(
-                        static_cast<cl_uchar>(valueMin + valueX + valueAdd));
-                    data.push_back(
-                        static_cast<cl_uchar>(valueMin + valueY + valueAdd));
-                    break;
-                case 4:
-                    data.push_back(
-                        static_cast<cl_uchar>(valueMin + valueX + valueAdd));
-                    data.push_back(
-                        static_cast<cl_uchar>(valueMin + valueY + valueAdd));
-                    data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2
-                                                         + valueAdd));
-                    if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8)
-                        data.push_back(static_cast<cl_uchar>(0xff));
-                    else
-                        data.push_back(static_cast<cl_uchar>(
-                            valueMin + valueY / 2 + valueAdd));
-                    break;
-                default:
-                    log_error("DataGenerate(): invalid channel number!");
-                    return;
-                    break;
-            }
-        }
-    }
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<float> &dataTest,
-                 const std::vector<float> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int channelNum)
-    float epsilon = 0.000001f;
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        unsigned int offset = i * width * channelNum;
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            for (unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
-            {
-                if (abs( + j * channelNum + planeIdx)
-                        - + j * channelNum + planeIdx))
-                    > epsilon)
-                {
-                    log_error(
-                        "Tested image is different than reference (x,y,plane) "
-                        "= (%i,%i,%i), test value = %f, expected value = %f\n",
-                        j, i, planeIdx,
-                        dataTest[offset + j * channelNum + planeIdx],
-                        dataExp[offset + j * channelNum + planeIdx]);
-                    return false;
-                }
-            }
-        }
-    }
-    return true;
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<cl_half> &dataTest,
-                 const std::vector<cl_half> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int channelNum)
-    switch (type)
-    {
-        case CL_HALF_FLOAT: {
-            float epsilon = 0.001f;
-            for (unsigned int i = 0; i < height; ++i)
-            {
-                unsigned int offset = i * width * channelNum;
-                for (unsigned int j = 0; j < width; ++j)
-                {
-                    for (unsigned planeIdx = 0; planeIdx < channelNum;
-                         ++planeIdx)
-                    {
-                        float test = cl_half_to_float(
-                   + j * channelNum + planeIdx));
-                        float ref = cl_half_to_float(
-                   + j * channelNum + planeIdx));
-                        if (abs(test - ref) > epsilon)
-                        {
-                            log_error("Tested image is different than "
-                                      "reference (x,y,plane) = "
-                                      "(%i,%i,%i), test value = %f, expected "
-                                      "value = %f\n",
-                                      j, i, planeIdx, test, ref);
-                            return false;
-                        }
-                    }
-                }
-            }
-        }
-        break;
-        case CL_UNORM_INT16: {
-            cl_ushort epsilon = 1;
-            for (unsigned int i = 0; i < height; ++i)
-            {
-                unsigned int offset = i * width * channelNum;
-                for (unsigned int j = 0; j < width; ++j)
-                {
-                    for (unsigned planeIdx = 0; planeIdx < channelNum;
-                         ++planeIdx)
-                    {
-                        cl_ushort test =
-                   + j * channelNum + planeIdx);
-                        cl_ushort ref =
-                   + j * channelNum + planeIdx);
-                        if (abs(test - ref) > epsilon)
-                        {
-                            log_error("Tested image is different than "
-                                      "reference (x,y,plane) = (%i,%i,%i), "
-                                      "test value = %i, expected value = %i\n",
-                                      j, i, planeIdx, test, ref);
-                            return false;
-                        }
-                    }
-                }
-            }
-        }
-        break;
-        default:
-            log_error("DataCompare(): Invalid data format!");
-            return false;
-            break;
-    }
-    return true;
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<cl_uchar> &dataTest,
-                 const std::vector<cl_uchar> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int planeNum)
-    for (unsigned int i = 0; i < height; ++i)
-    {
-        unsigned int offset = i * width * planeNum;
-        for (unsigned int j = 0; j < width; ++j)
-        {
-            for (unsigned planeIdx = 0; planeIdx < planeNum; ++planeIdx)
-            {
-                if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8 && planeIdx == 3)
-                    continue;
-                cl_uchar test = + j * planeNum + planeIdx);
-                cl_uchar ref = + j * planeNum + planeIdx);
-                if (test != ref)
-                {
-                    log_error(
-                        "Tested image is different than reference (x,y,plane) "
-                        "= (%i,%i,%i), test value = %i, expected value = %i\n",
-                        j, i, planeIdx, test, ref);
-                    return false;
-                }
-            }
-        }
-    }
-    return true;
-bool GetImageInfo(cl_mem object, cl_image_format formatExp,
-                  size_t elementSizeExp, size_t rowPitchExp,
-                  size_t slicePitchExp, size_t widthExp, size_t heightExp,
-                  size_t depthExp, unsigned int planeExp)
-    bool result = true;
-    cl_image_format format;
-    if (clGetImageInfo(object, CL_IMAGE_FORMAT, sizeof(cl_image_format),
-                       &format, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_FORMAT) failed\n");
-        result = false;
-    }
-    if (formatExp.image_channel_order != format.image_channel_order
-        || formatExp.image_channel_data_type != format.image_channel_data_type)
-    {
-        log_error("Value of CL_IMAGE_FORMAT is different than expected\n");
-        result = false;
-    }
-    size_t elementSize = 0;
-    if (clGetImageInfo(object, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t),
-                       &elementSize, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_ELEMENT_SIZE) failed\n");
-        result = false;
-    }
-    if (elementSizeExp != elementSize)
-    {
-        log_error("Value of CL_IMAGE_ELEMENT_SIZE is different than expected "
-                  "(size: %i, exp size: %i)\n",
-                  elementSize, elementSizeExp);
-        result = false;
-    }
-    size_t rowPitch = 0;
-    if (clGetImageInfo(object, CL_IMAGE_ROW_PITCH, sizeof(size_t), &rowPitch, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_ROW_PITCH) failed\n");
-        result = false;
-    }
-    if ((rowPitchExp == 0 && rowPitchExp != rowPitch)
-        || (rowPitchExp > 0 && rowPitchExp > rowPitch))
-    {
-        log_error("Value of CL_IMAGE_ROW_PITCH is different than expected "
-                  "(size: %i, exp size: %i)\n",
-                  rowPitch, rowPitchExp);
-        result = false;
-    }
-    size_t slicePitch = 0;
-    if (clGetImageInfo(object, CL_IMAGE_SLICE_PITCH, sizeof(size_t),
-                       &slicePitch, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_SLICE_PITCH) failed\n");
-        result = false;
-    }
-    if ((slicePitchExp == 0 && slicePitchExp != slicePitch)
-        || (slicePitchExp > 0 && slicePitchExp > slicePitch))
-    {
-        log_error("Value of CL_IMAGE_SLICE_PITCH is different than expected "
-                  "(size: %i, exp size: %i)\n",
-                  slicePitch, slicePitchExp);
-        result = false;
-    }
-    size_t width = 0;
-    if (clGetImageInfo(object, CL_IMAGE_WIDTH, sizeof(size_t), &width, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_WIDTH) failed\n");
-        result = false;
-    }
-    if (widthExp != width)
-    {
-        log_error("Value of CL_IMAGE_WIDTH is different than expected (size: "
-                  "%i, exp size: %i)\n",
-                  width, widthExp);
-        result = false;
-    }
-    size_t height = 0;
-    if (clGetImageInfo(object, CL_IMAGE_HEIGHT, sizeof(size_t), &height, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_HEIGHT) failed\n");
-        result = false;
-    }
-    if (heightExp != height)
-    {
-        log_error("Value of CL_IMAGE_HEIGHT is different than expected (size: "
-                  "%i, exp size: %i)\n",
-                  height, heightExp);
-        result = false;
-    }
-    size_t depth = 0;
-    if (clGetImageInfo(object, CL_IMAGE_DEPTH, sizeof(size_t), &depth, 0)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_DEPTH) failed\n");
-        result = false;
-    }
-    if (depthExp != depth)
-    {
-        log_error("Value of CL_IMAGE_DEPTH is different than expected (size: "
-                  "%i, exp size: %i)\n",
-                  depth, depthExp);
-        result = false;
-    }
-    unsigned int plane = 99;
-    size_t paramSize = 0;
-    if (clGetImageInfo(object, CL_IMAGE_DX9_MEDIA_PLANE_KHR,
-                       sizeof(unsigned int), &plane, &paramSize)
-        != CL_SUCCESS)
-    {
-        log_error("clGetImageInfo(CL_IMAGE_MEDIA_SURFACE_PLANE_KHR) failed\n");
-        result = false;
-    }
-    if (planeExp != plane)
-    {
-        log_error("Value of CL_IMAGE_MEDIA_SURFACE_PLANE_KHR is different than "
-                  "expected (plane: %i, exp plane: %i)\n",
-                  plane, planeExp);
-        result = false;
-    }
-    return result;
-bool GetMemObjInfo(cl_mem object, cl_dx9_media_adapter_type_khr adapterType,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   void *shareHandleExp)
-    bool result = true;
-    switch (adapterType)
-    {
-        case CL_ADAPTER_D3D9_KHR:
-        case CL_ADAPTER_D3D9EX_KHR:
-        case CL_ADAPTER_DXVA_KHR: {
-#if defined(_WIN32)
-            cl_dx9_surface_info_khr surfaceInfo;
-            void *surfaceInfo = 0;
-            return false;
-            size_t paramSize = 0;
-            if (clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR,
-                                   sizeof(surfaceInfo), &surfaceInfo,
-                                   &paramSize)
-                != CL_SUCCESS)
-            {
-                log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR) "
-                          "failed\n");
-                result = false;
-            }
-#if defined(_WIN32)
-            CD3D9SurfaceWrapper *d3d9Surface =
-                static_cast<CD3D9SurfaceWrapper *>(surface.get());
-            if (*d3d9Surface != surfaceInfo.resource)
-            {
-                log_error(
-                    "Invalid resource for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
-                result = false;
-            }
-            if (shareHandleExp != surfaceInfo.shared_handle)
-            {
-                log_error("Invalid shared handle for "
-                          "CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
-                result = false;
-            }
-            return false;
-            if (paramSize != sizeof(surfaceInfo))
-            {
-                log_error("Invalid CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR parameter "
-                          "size: %i, expected: %i\n",
-                          paramSize, sizeof(surfaceInfo));
-                result = false;
-            }
-            paramSize = 0;
-            cl_dx9_media_adapter_type_khr mediaAdapterType;
-            if (clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR,
-                                   sizeof(mediaAdapterType), &mediaAdapterType,
-                                   &paramSize)
-                != CL_SUCCESS)
-            {
-                log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR) "
-                          "failed\n");
-                result = false;
-            }
-            if (adapterType != mediaAdapterType)
-            {
-                log_error("Invalid media adapter type for "
-                          "CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR\n");
-                result = false;
-            }
-            if (paramSize != sizeof(mediaAdapterType))
-            {
-                log_error("Invalid CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR parameter "
-                          "size: %i, expected: %i\n",
-                          paramSize, sizeof(mediaAdapterType));
-                result = false;
-            }
-        }
-        break;
-        default:
-            log_error("GetMemObjInfo(): Unknown adapter type!\n");
-            return false;
-            break;
-    }
-    return result;
-bool ImageInfoVerify(cl_dx9_media_adapter_type_khr adapterType,
-                     const std::vector<cl_mem> &memObjList, unsigned int width,
-                     unsigned int height,
-                     std::auto_ptr<CSurfaceWrapper> &surface,
-                     void *sharedHandle)
-    if (memObjList.size() != 2 && memObjList.size() != 3)
-    {
-        log_error("ImageInfoVerify(): Invalid object list parameter\n");
-        return false;
-    }
-    cl_image_format formatPlane;
-    formatPlane.image_channel_data_type = CL_UNORM_INT8;
-    formatPlane.image_channel_order = CL_R;
-    // plane 0 verification
-    if (!GetImageInfo(memObjList[0], formatPlane, sizeof(cl_uchar),
-                      width * sizeof(cl_uchar), 0, width, height, 0, 0))
-    {
-        log_error("clGetImageInfo failed\n");
-        return false;
-    }
-    switch (memObjList.size())
-    {
-        case 2: {
-            formatPlane.image_channel_data_type = CL_UNORM_INT8;
-            formatPlane.image_channel_order = CL_RG;
-            if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar) * 2,
-                              width * sizeof(cl_uchar), 0, width / 2,
-                              height / 2, 0, 1))
-            {
-                log_error("clGetImageInfo failed\n");
-                return false;
-            }
-        }
-        break;
-        case 3: {
-            if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar),
-                              width * sizeof(cl_uchar) / 2, 0, width / 2,
-                              height / 2, 0, 1))
-            {
-                log_error("clGetImageInfo failed\n");
-                return false;
-            }
-            if (!GetImageInfo(memObjList[2], formatPlane, sizeof(cl_uchar),
-                              width * sizeof(cl_uchar) / 2, 0, width / 2,
-                              height / 2, 0, 2))
-            {
-                log_error("clGetImageInfo failed\n");
-                return false;
-            }
-        }
-        break;
-        default:
-            log_error("ImageInfoVerify(): Invalid object list parameter\n");
-            return false;
-            break;
-    }
-    for (size_t i = 0; i < memObjList.size(); ++i)
-    {
-        if (!GetMemObjInfo(memObjList[i], adapterType, surface, sharedHandle))
-        {
-            log_error("clGetMemObjInfo(%i) failed\n", i);
-            return false;
-        }
-    }
-    return true;
-bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType,
-                      const cl_image_format imageFormatCheck)
-    cl_uint imageFormatsNum = 0;
-    cl_int error = clGetSupportedImageFormats(
-        context, CL_MEM_READ_WRITE, imageType, 0, 0, &imageFormatsNum);
-    if (error != CL_SUCCESS)
-    {
-        log_error("clGetSupportedImageFormats failed\n");
-        return false;
-    }
-    if (imageFormatsNum < 1)
-    {
-        log_error("Invalid image format number returned by "
-                  "clGetSupportedImageFormats\n");
-        return false;
-    }
-    std::vector<cl_image_format> imageFormats(imageFormatsNum);
-    error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType,
-                                       imageFormatsNum, &imageFormats[0], 0);
-    if (error != CL_SUCCESS)
-    {
-        log_error("clGetSupportedImageFormats failed\n");
-        return false;
-    }
-    for (cl_uint i = 0; i < imageFormatsNum; ++i)
-    {
-        if (imageFormats[i].image_channel_data_type
-                == imageFormatCheck.image_channel_data_type
-            && imageFormats[i].image_channel_order
-                == imageFormatCheck.image_channel_order)
-        {
-            return true;
-        }
-    }
-    return false;
-unsigned int ChannelNum(TSurfaceFormat surfaceFormat)
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_R32F:
-        case SURFACE_FORMAT_R16F:
-        case SURFACE_FORMAT_L16:
-        case SURFACE_FORMAT_A8:
-        case SURFACE_FORMAT_L8: return 1; break;
-        case SURFACE_FORMAT_G32R32F:
-        case SURFACE_FORMAT_G16R16F:
-        case SURFACE_FORMAT_G16R16:
-        case SURFACE_FORMAT_A8L8: return 2; break;
-        case SURFACE_FORMAT_NV12:
-        case SURFACE_FORMAT_YV12: return 3; break;
-        case SURFACE_FORMAT_A32B32G32R32F:
-        case SURFACE_FORMAT_A16B16G16R16F:
-        case SURFACE_FORMAT_A16B16G16R16:
-        case SURFACE_FORMAT_A8B8G8R8:
-        case SURFACE_FORMAT_X8B8G8R8:
-        case SURFACE_FORMAT_A8R8G8B8:
-        case SURFACE_FORMAT_X8R8G8B8: return 4; break;
-        default:
-            log_error("ChannelNum(): unknown surface format!\n");
-            return 0;
-            break;
-    }
-unsigned int PlanesNum(TSurfaceFormat surfaceFormat)
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_R32F:
-        case SURFACE_FORMAT_R16F:
-        case SURFACE_FORMAT_L16:
-        case SURFACE_FORMAT_A8:
-        case SURFACE_FORMAT_L8:
-        case SURFACE_FORMAT_G32R32F:
-        case SURFACE_FORMAT_G16R16F:
-        case SURFACE_FORMAT_G16R16:
-        case SURFACE_FORMAT_A8L8:
-        case SURFACE_FORMAT_A32B32G32R32F:
-        case SURFACE_FORMAT_A16B16G16R16F:
-        case SURFACE_FORMAT_A16B16G16R16:
-        case SURFACE_FORMAT_A8B8G8R8:
-        case SURFACE_FORMAT_X8B8G8R8:
-        case SURFACE_FORMAT_A8R8G8B8:
-        case SURFACE_FORMAT_X8R8G8B8: return 1; break;
-        case SURFACE_FORMAT_NV12: return 2; break;
-        case SURFACE_FORMAT_YV12: return 3; break;
-        default:
-            log_error("PlanesNum(): unknown surface format!\n");
-            return 0;
-            break;
-    }
-#if defined(_WIN32)
-D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat)
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_R32F: return D3DFMT_R32F; break;
-        case SURFACE_FORMAT_R16F: return D3DFMT_R16F; break;
-        case SURFACE_FORMAT_L16: return D3DFMT_L16; break;
-        case SURFACE_FORMAT_A8: return D3DFMT_A8; break;
-        case SURFACE_FORMAT_L8: return D3DFMT_L8; break;
-        case SURFACE_FORMAT_G32R32F: return D3DFMT_G32R32F; break;
-        case SURFACE_FORMAT_G16R16F: return D3DFMT_G16R16F; break;
-        case SURFACE_FORMAT_G16R16: return D3DFMT_G16R16; break;
-        case SURFACE_FORMAT_A8L8: return D3DFMT_A8L8; break;
-        case SURFACE_FORMAT_A32B32G32R32F: return D3DFMT_A32B32G32R32F; break;
-        case SURFACE_FORMAT_A16B16G16R16F: return D3DFMT_A16B16G16R16F; break;
-        case SURFACE_FORMAT_A16B16G16R16: return D3DFMT_A16B16G16R16; break;
-        case SURFACE_FORMAT_A8B8G8R8: return D3DFMT_A8B8G8R8; break;
-        case SURFACE_FORMAT_X8B8G8R8: return D3DFMT_X8B8G8R8; break;
-        case SURFACE_FORMAT_A8R8G8B8: return D3DFMT_A8R8G8B8; break;
-        case SURFACE_FORMAT_X8R8G8B8: return D3DFMT_X8R8G8B8; break;
-        case SURFACE_FORMAT_NV12:
-            return static_cast<D3DFORMAT>(MAKEFOURCC('N', 'V', '1', '2'));
-            break;
-        case SURFACE_FORMAT_YV12:
-            return static_cast<D3DFORMAT>(MAKEFOURCC('Y', 'V', '1', '2'));
-            break;
-        default:
-            log_error("SurfaceFormatToD3D(): unknown surface format!\n");
-            return D3DFMT_R32F;
-            break;
-    }
-bool DeviceCreate(cl_dx9_media_adapter_type_khr adapterType,
-                  std::auto_ptr<CDeviceWrapper> &device)
-    switch (adapterType)
-    {
-#if defined(_WIN32)
-        case CL_ADAPTER_D3D9_KHR:
-            device = std::auto_ptr<CDeviceWrapper>(new CD3D9Wrapper());
-            break;
-        case CL_ADAPTER_D3D9EX_KHR:
-            device = std::auto_ptr<CDeviceWrapper>(new CD3D9ExWrapper());
-            break;
-        case CL_ADAPTER_DXVA_KHR:
-            device = std::auto_ptr<CDeviceWrapper>(new CDXVAWrapper());
-            break;
-        default:
-            log_error("DeviceCreate(): Unknown adapter type!\n");
-            return false;
-            break;
-    }
-    return device->Status();
-bool SurfaceFormatCheck(cl_dx9_media_adapter_type_khr adapterType,
-                        const CDeviceWrapper &device,
-                        TSurfaceFormat surfaceFormat)
-    switch (adapterType)
-    {
-#if defined(_WIN32)
-        case CL_ADAPTER_D3D9_KHR:
-        case CL_ADAPTER_D3D9EX_KHR:
-        case CL_ADAPTER_DXVA_KHR: {
-            D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
-            LPDIRECT3D9 d3d9 = static_cast<LPDIRECT3D9>(device.D3D());
-            D3DDISPLAYMODE d3ddm;
-            d3d9->GetAdapterDisplayMode(device.AdapterIdx(), &d3ddm);
-            if (FAILED(d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT,
-                                               D3DDEVTYPE_HAL, d3ddm.Format, 0,
-                                               D3DRTYPE_SURFACE, d3dFormat)))
-                return false;
-        }
-        break;
-        default:
-            log_error("SurfaceFormatCheck(): Unknown adapter type!\n");
-            return false;
-            break;
-    }
-    return true;
-bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format)
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_R32F:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_FLOAT;
-            break;
-        case SURFACE_FORMAT_R16F:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_HALF_FLOAT;
-            break;
-        case SURFACE_FORMAT_L16:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_UNORM_INT16;
-            break;
-        case SURFACE_FORMAT_A8:
-            format.image_channel_order = CL_A;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_L8:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_G32R32F:
-            format.image_channel_order = CL_RG;
-            format.image_channel_data_type = CL_FLOAT;
-            break;
-        case SURFACE_FORMAT_G16R16F:
-            format.image_channel_order = CL_RG;
-            format.image_channel_data_type = CL_HALF_FLOAT;
-            break;
-        case SURFACE_FORMAT_G16R16:
-            format.image_channel_order = CL_RG;
-            format.image_channel_data_type = CL_UNORM_INT16;
-            break;
-        case SURFACE_FORMAT_A8L8:
-            format.image_channel_order = CL_RG;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_A32B32G32R32F:
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_FLOAT;
-            break;
-        case SURFACE_FORMAT_A16B16G16R16F:
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_HALF_FLOAT;
-            break;
-        case SURFACE_FORMAT_A16B16G16R16:
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_UNORM_INT16;
-            break;
-        case SURFACE_FORMAT_A8B8G8R8:
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_X8B8G8R8:
-            format.image_channel_order = CL_RGBA;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_A8R8G8B8:
-            format.image_channel_order = CL_BGRA;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_X8R8G8B8:
-            format.image_channel_order = CL_BGRA;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_NV12:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        case SURFACE_FORMAT_YV12:
-            format.image_channel_order = CL_R;
-            format.image_channel_data_type = CL_UNORM_INT8;
-            break;
-        default:
-            log_error("SurfaceFormatToOCL(): Unknown surface format!\n");
-            return false;
-            break;
-    }
-    return true;
-void SurfaceFormatToString(TSurfaceFormat surfaceFormat, std::string &str)
-    switch (surfaceFormat)
-    {
-        case SURFACE_FORMAT_R32F: str = "R32F"; break;
-        case SURFACE_FORMAT_R16F: str = "R16F"; break;
-        case SURFACE_FORMAT_L16: str = "L16"; break;
-        case SURFACE_FORMAT_A8: str = "A8"; break;
-        case SURFACE_FORMAT_L8: str = "L8"; break;
-        case SURFACE_FORMAT_G32R32F: str = "G32R32F"; break;
-        case SURFACE_FORMAT_G16R16F: str = "G16R16F"; break;
-        case SURFACE_FORMAT_G16R16: str = "G16R16"; break;
-        case SURFACE_FORMAT_A8L8: str = "A8L8"; break;
-        case SURFACE_FORMAT_A32B32G32R32F: str = "A32B32G32R32F"; break;
-        case SURFACE_FORMAT_A16B16G16R16F: str = "A16B16G16R16F"; break;
-        case SURFACE_FORMAT_A16B16G16R16: str = "A16B16G16R16"; break;
-        case SURFACE_FORMAT_A8B8G8R8: str = "A8B8G8R8"; break;
-        case SURFACE_FORMAT_X8B8G8R8: str = "X8B8G8R8"; break;
-        case SURFACE_FORMAT_A8R8G8B8: str = "A8R8G8B8"; break;
-        case SURFACE_FORMAT_X8R8G8B8: str = "X8R8G8B8"; break;
-        case SURFACE_FORMAT_NV12: str = "NV12"; break;
-        case SURFACE_FORMAT_YV12: str = "YV12"; break;
-        default:
-            log_error("SurfaceFormatToString(): unknown surface format!\n");
-            str = "unknown";
-            break;
-    }
-bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType,
-                        unsigned int width, unsigned int height,
-                        TSurfaceFormat surfaceFormat, CDeviceWrapper &device,
-                        std::auto_ptr<CSurfaceWrapper> &surface,
-                        bool sharedHandle, void **objectSharedHandle)
-    switch (adapterType)
-    {
-#if defined(_WIN32)
-        case CL_ADAPTER_D3D9_KHR: {
-            surface =
-                std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
-            CD3D9SurfaceWrapper *d3dSurface =
-                static_cast<CD3D9SurfaceWrapper *>(surface.get());
-            HRESULT hr = 0;
-            D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
-            LPDIRECT3DDEVICE9 d3d9Device = (LPDIRECT3DDEVICE9)device.Device();
-            hr = d3d9Device->CreateOffscreenPlainSurface(
-                width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
-                sharedHandle ? objectSharedHandle : 0);
-            if (FAILED(hr))
-            {
-                log_error("CreateOffscreenPlainSurface failed\n");
-                return false;
-            }
-        }
-        break;
-        case CL_ADAPTER_D3D9EX_KHR: {
-            surface =
-                std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
-            CD3D9SurfaceWrapper *d3dSurface =
-                static_cast<CD3D9SurfaceWrapper *>(surface.get());
-            HRESULT hr = 0;
-            D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
-            LPDIRECT3DDEVICE9EX d3d9ExDevice =
-                (LPDIRECT3DDEVICE9EX)device.Device();
-            hr = d3d9ExDevice->CreateOffscreenPlainSurface(
-                width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
-                sharedHandle ? objectSharedHandle : 0);
-            if (FAILED(hr))
-            {
-                log_error("CreateOffscreenPlainSurface failed\n");
-                return false;
-            }
-        }
-        break;
-        case CL_ADAPTER_DXVA_KHR: {
-            surface =
-                std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
-            CD3D9SurfaceWrapper *d3dSurface =
-                static_cast<CD3D9SurfaceWrapper *>(surface.get());
-            HRESULT hr = 0;
-            D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
-            IDXVAHD_Device *dxvaDevice = (IDXVAHD_Device *)device.Device();
-            hr = dxvaDevice->CreateVideoSurface(
-                width, height, d3dFormat, D3DPOOL_DEFAULT, 0,
-                DXVAHD_SURFACE_TYPE_VIDEO_INPUT, 1, &(*d3dSurface),
-                sharedHandle ? objectSharedHandle : 0);
-            if (FAILED(hr))
-            {
-                log_error("CreateVideoSurface failed\n");
-                return false;
-            }
-        }
-        break;
-        default:
-            log_error("MediaSurfaceCreate(): Unknown adapter type!\n");
-            return false;
-            break;
-    }
-    return true;
-cl_int deviceExistForCLTest(
-    cl_platform_id platform, cl_dx9_media_adapter_type_khr media_adapters_type,
-    void *media_adapters, CResult &result,
-    TSharedHandleType sharedHandle /*default SHARED_HANDLE_ENABLED*/
-    cl_int _error;
-    cl_uint devicesAllNum = 0;
-    std::string sharedHandleStr =
-        (sharedHandle == SHARED_HANDLE_ENABLED) ? "yes" : "no";
-    std::string adapterStr;
-    AdapterToString(media_adapters_type, adapterStr);
-    _error = clGetDeviceIDsFromDX9MediaAdapterKHR(
-        platform, 1, &media_adapters_type, &media_adapters,
-    if (_error != CL_SUCCESS)
-    {
-        if (_error != CL_DEVICE_NOT_FOUND)
-        {
-            log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n",
-                      IGetErrorString(_error));
-            result.ResultSub(CResult::TEST_ERROR);
-        }
-        else
-        {
-            log_info("Skipping test case, device type is not supported by a "
-                     "device (adapter type: %s, shared handle: %s)\n",
-                     adapterStr.c_str(), sharedHandleStr.c_str());
-            result.ResultSub(CResult::TEST_NOTSUPPORTED);
-        }
-    }
-    return _error;
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.h b/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.h
deleted file mode 100644
index 56c0fc2..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/utils.h
+++ /dev/null
@@ -1,215 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef __UTILS_KHR_MEDIA_H
-#define __UTILS_KHR_MEDIA_H
-#include <string>
-#include <iostream>
-#include <memory>
-#include <vector>
-#include "wrappers.h"
-#include "CL/cl_dx9_media_sharing.h"
-#include "harness/typeWrappers.h"
-extern clGetDeviceIDsFromDX9MediaAdapterKHR_fn
-    clGetDeviceIDsFromDX9MediaAdapterKHR;
-extern clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR;
-extern clEnqueueAcquireDX9MediaSurfacesKHR_fn
-    clEnqueueAcquireDX9MediaSurfacesKHR;
-extern clEnqueueReleaseDX9MediaSurfacesKHR_fn
-    clEnqueueReleaseDX9MediaSurfacesKHR;
-extern cl_platform_id gPlatformIDdetected;
-extern cl_device_id gDeviceIDdetected;
-extern cl_device_type gDeviceTypeSelected;
-#define NL "\n"
-enum TSurfaceFormat
-enum TContextFuncType
-enum TSharedHandleType
-class CResult {
-    enum TTestResult
-    {
-        TEST_PASS,
-        TEST_FAIL,
-        TEST_ERROR,
-    };
-    CResult();
-    ~CResult();
-    void ResultSub(TTestResult result);
-    TTestResult ResultLast() const;
-    int Result() const;
-    TTestResult _result;
-    TTestResult _resultLast;
-void FunctionContextCreateToString(TContextFuncType contextCreateFunction,
-                                   std::string &contextFunction);
-void AdapterToString(cl_dx9_media_adapter_type_khr adapterType,
-                     std::string &adapter);
-AdapterTypeToContextInfo(cl_dx9_media_adapter_type_khr adapterType);
-// YUV utils
-void YUVGenerateNV12(std::vector<cl_uchar> &yuv, unsigned int width,
-                     unsigned int height, cl_uchar valueMin, cl_uchar valueMax,
-                     double valueAdd = 0.0);
-void YUVGenerateYV12(std::vector<cl_uchar> &yuv, unsigned int width,
-                     unsigned int height, cl_uchar valueMin, cl_uchar valueMax,
-                     double valueAdd = 0.0);
-bool YUVGenerate(TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv,
-                 unsigned int width, unsigned int height, cl_uchar valueMin,
-                 cl_uchar valueMax, double valueAdd = 0.0);
-bool YUVSurfaceSetNV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       const std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height);
-bool YUVSurfaceSetYV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       const std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height);
-bool YUVSurfaceSet(TSurfaceFormat surfaceFormat,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   const std::vector<cl_uchar> &yuv, unsigned int width,
-                   unsigned int height);
-bool YUVSurfaceGetNV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height);
-bool YUVSurfaceGetYV12(std::auto_ptr<CSurfaceWrapper> &surface,
-                       std::vector<cl_uchar> &yuv, unsigned int width,
-                       unsigned int height);
-bool YUVSurfaceGet(TSurfaceFormat surfaceFormat,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   std::vector<cl_uchar> &yuv, unsigned int width,
-                   unsigned int height);
-bool YUVCompareNV12(const std::vector<cl_uchar> &yuvTest,
-                    const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                    unsigned int height);
-bool YUVCompareYV12(const std::vector<cl_uchar> &yuvTest,
-                    const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                    unsigned int height);
-bool YUVCompare(TSurfaceFormat surfaceFormat,
-                const std::vector<cl_uchar> &yuvTest,
-                const std::vector<cl_uchar> &yuvRef, unsigned int width,
-                unsigned int height);
-// other types utils
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<float> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<cl_half> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
-void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                  std::vector<cl_uchar> &data, unsigned int width,
-                  unsigned int height, unsigned int channelNum,
-                  float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<cl_float> &dataTest,
-                 const std::vector<cl_float> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int channelNum);
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<cl_half> &dataTest,
-                 const std::vector<cl_half> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int channelNum);
-bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type,
-                 const std::vector<cl_uchar> &dataTest,
-                 const std::vector<cl_uchar> &dataExp, unsigned int width,
-                 unsigned int height, unsigned int channelNum);
-bool GetImageInfo(cl_mem object, cl_image_format formatExp,
-                  size_t elementSizeExp, size_t rowPitchExp,
-                  size_t slicePitchExp, size_t widthExp, size_t heightExp,
-                  size_t depthExp, unsigned int planeExp);
-bool GetMemObjInfo(cl_mem object, cl_dx9_media_adapter_type_khr adapterType,
-                   std::auto_ptr<CSurfaceWrapper> &surface,
-                   void *shareHandleExp);
-bool ImageInfoVerify(cl_dx9_media_adapter_type_khr adapterType,
-                     const std::vector<cl_mem> &memObjList, unsigned int width,
-                     unsigned int height,
-                     std::auto_ptr<CSurfaceWrapper> &surface,
-                     void *sharedHandle);
-bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType,
-                      const cl_image_format imageFormatCheck);
-unsigned int ChannelNum(TSurfaceFormat surfaceFormat);
-unsigned int PlanesNum(TSurfaceFormat surfaceFormat);
-#if defined(_WIN32)
-D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat);
-bool DeviceCreate(cl_dx9_media_adapter_type_khr adapterType,
-                  std::auto_ptr<CDeviceWrapper> &device);
-bool SurfaceFormatCheck(cl_dx9_media_adapter_type_khr adapterType,
-                        const CDeviceWrapper &device,
-                        TSurfaceFormat surfaceFormat);
-bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format);
-void SurfaceFormatToString(TSurfaceFormat surfaceFormat, std::string &str);
-bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType,
-                        unsigned int width, unsigned int height,
-                        TSurfaceFormat surfaceFormat, CDeviceWrapper &device,
-                        std::auto_ptr<CSurfaceWrapper> &surface,
-                        bool sharedHandle, void **objectSharedHandle);
-deviceExistForCLTest(cl_platform_id platform,
-                     cl_dx9_media_adapter_type_khr media_adapters_type,
-                     void *media_adapters, CResult &result,
-                     TSharedHandleType sharedHandle = SHARED_HANDLE_DISABLED);
-#endif // __UTILS_KHR_MEDIA_H
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.cpp
deleted file mode 100644
index e156584..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.cpp
+++ /dev/null
@@ -1,463 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "wrappers.h"
-#include "harness/errorHelpers.h"
-LPCTSTR CDeviceWrapper::WINDOW_TITLE = _T( "cl_khr_dx9_media_sharing" );
-const int CDeviceWrapper::WINDOW_WIDTH = 256;
-const int CDeviceWrapper::WINDOW_HEIGHT = 256;
-CDeviceWrapper::TAccelerationType CDeviceWrapper::accelerationType =
-    CDeviceWrapper::ACCELERATION_HW;
-#if defined(_WIN32)
-const unsigned int CDXVAWrapper::VIDEO_FPS = 60;
-#if defined(_WIN32)
-static LRESULT WINAPI WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
-    switch (msg)
-    {
-        case WM_DESTROY: PostQuitMessage(0); return 0;
-        case WM_PAINT: ValidateRect(hWnd, 0); return 0;
-        default: break;
-    }
-    return DefWindowProc(hWnd, msg, wParam, lParam);
-#if defined(_WIN32)
-    : _hInstance(NULL), _hWnd(NULL)
-void CDeviceWrapper::WindowInit()
-#if defined(_WIN32)
-    _hInstance = GetModuleHandle(NULL);
-    static WNDCLASSEX wc = {
-        sizeof(WNDCLASSEX), CS_CLASSDC, WndProc, 0L,   0L,
-        _hInstance,         NULL,       NULL,    NULL, NULL,
-        WINDOW_TITLE,       NULL
-    };
-    RegisterClassEx(&wc);
-                         WINDOW_WIDTH, WINDOW_HEIGHT, NULL, NULL, wc.hInstance,
-                         NULL);
-    if (!_hWnd)
-    {
-        log_error("Failed to create window");
-        return;
-    }
-    ShowWindow(_hWnd, SW_SHOWDEFAULT);
-    UpdateWindow(_hWnd);
-void CDeviceWrapper::WindowDestroy()
-#if defined(_WIN32)
-    if (_hWnd) DestroyWindow(_hWnd);
-    _hWnd = NULL;
-#if defined(_WIN32)
-HWND CDeviceWrapper::WindowHandle() const { return _hWnd; }
-int CDeviceWrapper::WindowWidth() const { return WINDOW_WIDTH; }
-int CDeviceWrapper::WindowHeight() const { return WINDOW_HEIGHT; }
-CDeviceWrapper::TAccelerationType CDeviceWrapper::AccelerationType()
-    return accelerationType;
-void CDeviceWrapper::AccelerationType(TAccelerationType accelerationTypeNew)
-    accelerationType = accelerationTypeNew;
-CDeviceWrapper::~CDeviceWrapper() { WindowDestroy(); }
-#if defined(_WIN32)
-    : _d3d9(NULL), _d3dDevice(NULL), _status(DEVICE_PASS), _adapterIdx(0),
-      _adapterFound(false)
-    WindowInit();
-    _d3d9 = Direct3DCreate9(D3D_SDK_VERSION);
-    if (!_d3d9)
-    {
-        log_error("Direct3DCreate9 failed\n");
-        _status = DEVICE_FAIL;
-    }
-    Destroy();
-    if (_d3d9) _d3d9->Release();
-    _d3d9 = 0;
-void CD3D9Wrapper::Destroy()
-    if (_d3dDevice) _d3dDevice->Release();
-    _d3dDevice = 0;
-cl_int CD3D9Wrapper::Init()
-    if (!WindowHandle())
-    {
-        log_error("D3D9: Window is not created\n");
-        _status = DEVICE_FAIL;
-        return DEVICE_FAIL;
-    }
-    if (!_d3d9 || DEVICE_PASS != _status || !_adapterFound) return false;
-    _d3d9->GetAdapterDisplayMode(_adapterIdx - 1, &_d3ddm);
-    ZeroMemory(&d3dParams, sizeof(d3dParams));
-    d3dParams.Windowed = TRUE;
-    d3dParams.BackBufferCount = 1;
-    d3dParams.SwapEffect = D3DSWAPEFFECT_DISCARD;
-    d3dParams.hDeviceWindow = WindowHandle();
-    d3dParams.BackBufferWidth = WindowWidth();
-    d3dParams.BackBufferHeight = WindowHeight();
-    d3dParams.BackBufferFormat = _d3ddm.Format;
-    DWORD processingType = (AccelerationType() == ACCELERATION_HW)
-    if (FAILED(_d3d9->CreateDevice(_adapterIdx - 1, D3DDEVTYPE_HAL,
-                                   WindowHandle(), processingType, &d3dParams,
-                                   &_d3dDevice)))
-    {
-        log_error("CreateDevice failed\n");
-        _status = DEVICE_FAIL;
-        return DEVICE_FAIL;
-    }
-    _d3dDevice->BeginScene();
-    _d3dDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
-    _d3dDevice->EndScene();
-    return true;
-void *CD3D9Wrapper::D3D() const { return _d3d9; }
-void *CD3D9Wrapper::Device() const { return _d3dDevice; }
-D3DFORMAT CD3D9Wrapper::Format() { return _d3ddm.Format; }
-D3DADAPTER_IDENTIFIER9 CD3D9Wrapper::Adapter() { return _adapter; }
-TDeviceStatus CD3D9Wrapper::Status() const { return _status; }
-bool CD3D9Wrapper::AdapterNext()
-    if (DEVICE_PASS != _status) return false;
-    _adapterFound = false;
-    for (; _adapterIdx < _d3d9->GetAdapterCount();)
-    {
-        ++_adapterIdx;
-        D3DCAPS9 caps;
-        if (FAILED(
-                _d3d9->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps)))
-            continue;
-        if (FAILED(_d3d9->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
-        {
-            log_error("D3D9: GetAdapterIdentifier failed\n");
-            _status = DEVICE_FAIL;
-            return false;
-        }
-        _adapterFound = true;
-        Destroy();
-        if (!Init())
-        {
-            _status = DEVICE_FAIL;
-            _adapterFound = false;
-        }
-        break;
-    }
-    return _adapterFound;
-unsigned int CD3D9Wrapper::AdapterIdx() const { return _adapterIdx - 1; }
-    : _d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(DEVICE_PASS), _adapterIdx(0),
-      _adapterFound(false)
-    WindowInit();
-    HRESULT result = Direct3DCreate9Ex(D3D_SDK_VERSION, &_d3d9Ex);
-    if (FAILED(result) || !_d3d9Ex)
-    {
-        log_error("Direct3DCreate9Ex failed\n");
-        _status = DEVICE_FAIL;
-    }
-    Destroy();
-    if (_d3d9Ex) _d3d9Ex->Release();
-    _d3d9Ex = 0;
-void *CD3D9ExWrapper::D3D() const { return _d3d9Ex; }
-void *CD3D9ExWrapper::Device() const { return _d3dDeviceEx; }
-D3DFORMAT CD3D9ExWrapper::Format() { return _d3ddmEx.Format; }
-D3DADAPTER_IDENTIFIER9 CD3D9ExWrapper::Adapter() { return _adapter; }
-cl_int CD3D9ExWrapper::Init()
-    if (!WindowHandle())
-    {
-        log_error("D3D9EX: Window is not created\n");
-        _status = DEVICE_FAIL;
-        return DEVICE_FAIL;
-    }
-    if (!_d3d9Ex || DEVICE_FAIL == _status || !_adapterFound)
-        return DEVICE_FAIL;
-    RECT rect;
-    GetClientRect(WindowHandle(), &rect);
-    ZeroMemory(&d3dParams, sizeof(d3dParams));
-    d3dParams.Windowed = TRUE;
-    d3dParams.SwapEffect = D3DSWAPEFFECT_FLIP;
-    d3dParams.BackBufferFormat = D3DFMT_X8R8G8B8;
-    d3dParams.BackBufferWidth = WindowWidth();
-    d3dParams.BackBufferHeight = WindowHeight();
-    d3dParams.BackBufferCount = 1;
-    d3dParams.hDeviceWindow = WindowHandle();
-    DWORD processingType = (AccelerationType() == ACCELERATION_HW)
-    if (FAILED(_d3d9Ex->CreateDeviceEx(_adapterIdx - 1, D3DDEVTYPE_HAL,
-                                       WindowHandle(), processingType,
-                                       &d3dParams, NULL, &_d3dDeviceEx)))
-    {
-        log_error("CreateDeviceEx failed\n");
-        _status = DEVICE_FAIL;
-        return DEVICE_FAIL;
-    }
-    _d3dDeviceEx->BeginScene();
-    _d3dDeviceEx->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
-    _d3dDeviceEx->EndScene();
-    return DEVICE_PASS;
-void CD3D9ExWrapper::Destroy()
-    if (_d3dDeviceEx) _d3dDeviceEx->Release();
-    _d3dDeviceEx = 0;
-TDeviceStatus CD3D9ExWrapper::Status() const { return _status; }
-bool CD3D9ExWrapper::AdapterNext()
-    if (DEVICE_FAIL == _status) return false;
-    _adapterFound = false;
-    for (; _adapterIdx < _d3d9Ex->GetAdapterCount();)
-    {
-        ++_adapterIdx;
-        D3DCAPS9 caps;
-        if (FAILED(
-                _d3d9Ex->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps)))
-            continue;
-        if (FAILED(
-                _d3d9Ex->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
-        {
-            log_error("D3D9EX: GetAdapterIdentifier failed\n");
-            _status = DEVICE_FAIL;
-            return false;
-        }
-        _adapterFound = true;
-        Destroy();
-        if (!Init())
-        {
-            _status = DEVICE_FAIL;
-            _adapterFound = _status;
-        }
-        break;
-    }
-    return _adapterFound;
-unsigned int CD3D9ExWrapper::AdapterIdx() const { return _adapterIdx - 1; }
-    : _dxvaDevice(NULL), _status(DEVICE_PASS), _adapterFound(false)
-    _status = _d3d9.Status();
-CDXVAWrapper::~CDXVAWrapper() { DXVAHDDestroy(); }
-void *CDXVAWrapper::Device() const { return _dxvaDevice; }
-TDeviceStatus CDXVAWrapper::Status() const
-    if (_status == DEVICE_FAIL || _d3d9.Status() == DEVICE_FAIL)
-        return DEVICE_FAIL;
-    else if (_status == DEVICE_NOTSUPPORTED
-             || _d3d9.Status() == DEVICE_NOTSUPPORTED)
-        return DEVICE_NOTSUPPORTED;
-    else
-        return DEVICE_PASS;
-bool CDXVAWrapper::AdapterNext()
-    if (DEVICE_PASS != _status) return false;
-    _adapterFound = _d3d9.AdapterNext();
-    _status = _d3d9.Status();
-    if (DEVICE_PASS != _status)
-    {
-        _adapterFound = false;
-        return false;
-    }
-    if (!_adapterFound) return false;
-    DXVAHDDestroy();
-    _status = DXVAHDInit();
-    if (DEVICE_PASS != _status)
-    {
-        _adapterFound = false;
-        return false;
-    }
-    return true;
-TDeviceStatus CDXVAWrapper::DXVAHDInit()
-    if ((_status == DEVICE_FAIL) || (_d3d9.Status() == DEVICE_FAIL)
-        || !_adapterFound)
-        return DEVICE_FAIL;
-    DXVAHD_RATIONAL fps = { VIDEO_FPS, 1 };
-    desc.InputFrameRate = fps;
-    desc.InputWidth = WindowWidth();
-    desc.InputHeight = WindowHeight();
-    desc.OutputFrameRate = fps;
-    desc.OutputWidth = WindowWidth();
-    desc.OutputHeight = WindowHeight();
-    _status = DEVICE_FAIL;
-    return DEVICE_FAIL;
-    HRESULT hr = DXVAHD_CreateDevice(
-        static_cast<IDirect3DDevice9Ex *>(_d3d9.Device()), &desc,
-    if (FAILED(hr))
-    {
-        if (hr == E_NOINTERFACE)
-        {
-            log_error(
-                "DXVAHD_CreateDevice skipped due to no supported devices!\n");
-            _status = DEVICE_NOTSUPPORTED;
-        }
-        else
-        {
-            log_error("DXVAHD_CreateDevice failed\n");
-            _status = DEVICE_FAIL;
-        }
-    }
-    return _status;
-void CDXVAWrapper::DXVAHDDestroy()
-    if (_dxvaDevice) _dxvaDevice->Release();
-    _dxvaDevice = 0;
-void *CDXVAWrapper::D3D() const { return _d3d9.D3D(); }
-unsigned int CDXVAWrapper::AdapterIdx() const { return _d3d9.AdapterIdx(); }
-const CD3D9ExWrapper &CDXVAWrapper::D3D9() const { return _d3d9; }
-CD3D9SurfaceWrapper::CD3D9SurfaceWrapper(): mMem(NULL) {}
-CD3D9SurfaceWrapper::CD3D9SurfaceWrapper(IDirect3DSurface9 *mem): mMem(mem) {}
-    if (mMem != NULL) mMem->Release();
-    mMem = NULL;
-CSurfaceWrapper::CSurfaceWrapper() {}
-CSurfaceWrapper::~CSurfaceWrapper() {}
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.h b/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.h
deleted file mode 100644
index e3a7c6d..0000000
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/wrappers.h
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef __WRAPPERS_H
-#define __WRAPPERS_H
-#if defined(_WIN32)
-#include <d3d9.h>
-#if defined(__MINGW32__)
-#include <rpcsal.h>
-typedef unsigned char UINT8;
-#define __out
-#define __in
-#define __inout
-#define __out_bcount(size)
-#define __out_bcount_opt(size)
-#define __in_opt
-#define __in_ecount(size)
-#define __in_ecount_opt(size)
-#define __out_opt
-#define __out_ecount(size)
-#define __out_ecount_opt(size)
-#define __in_bcount_opt(size)
-#define __inout_opt
-#define __inout_bcount(size)
-#define __in_bcount(size)
-#define __deref_out
-#include <dxvahd.h>
-#include <tchar.h>
-enum TDeviceStatus
-class CDeviceWrapper {
-    enum TAccelerationType
-    {
-    };
-    CDeviceWrapper();
-    virtual ~CDeviceWrapper();
-    virtual bool AdapterNext() = 0;
-    virtual unsigned int AdapterIdx() const = 0;
-    virtual void *Device() const = 0;
-    virtual TDeviceStatus Status() const = 0;
-    virtual void *D3D() const = 0;
-#if defined(_WIN32)
-    HWND WindowHandle() const;
-    int WindowWidth() const;
-    int WindowHeight() const;
-    void WindowInit();
-    static TAccelerationType AccelerationType();
-    static void AccelerationType(TAccelerationType accelerationTypeNew);
-    static const int WINDOW_WIDTH;
-    static const int WINDOW_HEIGHT;
-    static TAccelerationType accelerationType;
-#if defined(_WIN32)
-    HMODULE _hInstance;
-    HWND _hWnd;
-    void WindowDestroy();
-class CSurfaceWrapper {
-    CSurfaceWrapper();
-    virtual ~CSurfaceWrapper();
-#if defined(_WIN32)
-// windows specific wrappers
-class CD3D9Wrapper : public CDeviceWrapper {
-    CD3D9Wrapper();
-    ~CD3D9Wrapper();
-    virtual bool AdapterNext();
-    virtual unsigned int AdapterIdx() const;
-    virtual void *Device() const;
-    virtual TDeviceStatus Status() const;
-    virtual void *D3D() const;
-    LPDIRECT3D9 _d3d9;
-    LPDIRECT3DDEVICE9 _d3dDevice;
-    D3DDISPLAYMODE _d3ddm;
-    TDeviceStatus _status;
-    unsigned int _adapterIdx;
-    bool _adapterFound;
-    D3DFORMAT Format();
-    int Init();
-    void Destroy();
-class CD3D9ExWrapper : public CDeviceWrapper {
-    CD3D9ExWrapper();
-    ~CD3D9ExWrapper();
-    virtual bool AdapterNext();
-    virtual unsigned int AdapterIdx() const;
-    virtual void *Device() const;
-    virtual TDeviceStatus Status() const;
-    virtual void *D3D() const;
-    LPDIRECT3D9EX _d3d9Ex;
-    LPDIRECT3DDEVICE9EX _d3dDeviceEx;
-    TDeviceStatus _status;
-    unsigned int _adapterIdx;
-    bool _adapterFound;
-    D3DFORMAT Format();
-    int Init();
-    void Destroy();
-class CDXVAWrapper : public CDeviceWrapper {
-    CDXVAWrapper();
-    ~CDXVAWrapper();
-    virtual bool AdapterNext();
-    virtual unsigned int AdapterIdx() const;
-    virtual void *Device() const;
-    virtual TDeviceStatus Status() const;
-    virtual void *D3D() const;
-    const CD3D9ExWrapper &D3D9() const;
-    CD3D9ExWrapper _d3d9;
-    IDXVAHD_Device *_dxvaDevice;
-    TDeviceStatus _status;
-    bool _adapterFound;
-    static const D3DFORMAT VIDEO_FORMAT;
-    static const unsigned int VIDEO_FPS;
-    TDeviceStatus DXVAHDInit();
-    void DXVAHDDestroy();
-class CD3D9SurfaceWrapper : public CSurfaceWrapper {
-    CD3D9SurfaceWrapper();
-    CD3D9SurfaceWrapper(IDirect3DSurface9 *mem);
-    ~CD3D9SurfaceWrapper();
-    operator IDirect3DSurface9 *() { return mMem; }
-    IDirect3DSurface9 **operator&() { return &mMem; }
-    IDirect3DSurface9 *operator->() const { return mMem; }
-    IDirect3DSurface9 *mMem;
-#endif // __D3D_WRAPPERS
diff --git a/test_conformance/generic_address_space/basic_tests.cpp b/test_conformance/generic_address_space/basic_tests.cpp
index b2e745c..0b81564 100644
--- a/test_conformance/generic_address_space/basic_tests.cpp
+++ b/test_conformance/generic_address_space/basic_tests.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -39,9 +39,7 @@
         const char *srcPtr = src.c_str();
-        if (create_single_kernel_helper(context, &program, &kernel, 1, &srcPtr,
-                                        "testKernel"))
-        {
+        if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) {
             log_error("create_single_kernel_helper failed");
             return -1;
diff --git a/test_conformance/generic_address_space/main.cpp b/test_conformance/generic_address_space/main.cpp
index 0114758..12fa4a6 100644
--- a/test_conformance/generic_address_space/main.cpp
+++ b/test_conformance/generic_address_space/main.cpp
@@ -75,9 +75,7 @@
     if (version < expected_min_version)
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
+        version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
         return TEST_SKIP;
diff --git a/test_conformance/generic_address_space/stress_tests.cpp b/test_conformance/generic_address_space/stress_tests.cpp
index 7193e69..4f94a5d 100644
--- a/test_conformance/generic_address_space/stress_tests.cpp
+++ b/test_conformance/generic_address_space/stress_tests.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -41,9 +41,7 @@
         const char *srcPtr = src.c_str();
-        if (create_single_kernel_helper(context, &program, &kernel, 1, &srcPtr,
-                                        "testKernel"))
-        {
+        if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) {
             log_error("create_single_kernel_helper failed");
             return -1;
diff --git a/test_conformance/geometrics/main.cpp b/test_conformance/geometrics/main.cpp
index 45f2b06..038999d 100644
--- a/test_conformance/geometrics/main.cpp
+++ b/test_conformance/geometrics/main.cpp
@@ -38,6 +38,6 @@
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/geometrics/test_geometrics.cpp b/test_conformance/geometrics/test_geometrics.cpp
index e305026..2fcf31c 100644
--- a/test_conformance/geometrics/test_geometrics.cpp
+++ b/test_conformance/geometrics/test_geometrics.cpp
@@ -188,25 +188,19 @@
         fillWithTrickyNumbers( inDataA, inDataB, vecsize );
-        streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    sizeof(cl_float) * vecsize * TEST_SIZE,
-                                    inDataA, NULL);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecsize * TEST_SIZE, inDataA, NULL);
         if( streams[0] == NULL )
             log_error("ERROR: Creating input array A failed!\n");
             return -1;
-        streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    sizeof(cl_float) * vecsize * TEST_SIZE,
-                                    inDataB, NULL);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecsize * TEST_SIZE, inDataB, NULL);
         if( streams[1] == NULL )
             log_error("ERROR: Creating input array B failed!\n");
             return -1;
-        streams[2] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * vecsize * TEST_SIZE, NULL, NULL);
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * vecsize * TEST_SIZE, NULL, NULL);
         if( streams[2] == NULL )
             log_error("ERROR: Creating output array failed!\n");
@@ -359,24 +353,19 @@
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * vecSize * TEST_SIZE, inDataB, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecSize * TEST_SIZE, inDataB, NULL);
     if( streams[1] == NULL )
         log_error("ERROR: Creating input array B failed!\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * TEST_SIZE, NULL, NULL);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * TEST_SIZE, NULL, NULL);
     if( streams[2] == NULL )
         log_error("ERROR: Creating output array failed!\n");
@@ -671,15 +660,14 @@
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
                                 sizeof(cl_float) * TEST_SIZE, NULL, NULL);
     if( streams[1] == NULL )
@@ -884,17 +872,13 @@
             inDataA[i] = any_float(d);
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecSize* TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_float) * vecSize * TEST_SIZE, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * vecSize  * TEST_SIZE, NULL, NULL);
     if( streams[1] == NULL )
         log_error("ERROR: Creating output array failed!\n");
diff --git a/test_conformance/geometrics/test_geometrics_double.cpp b/test_conformance/geometrics/test_geometrics_double.cpp
index 222017e..7dec751 100644
--- a/test_conformance/geometrics/test_geometrics_double.cpp
+++ b/test_conformance/geometrics/test_geometrics_double.cpp
@@ -210,22 +210,19 @@
         fillWithTrickyNumbers_double( inDataA, inDataB, vecsize );
-        streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, bufSize,
-                                    inDataA, NULL);
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), bufSize, inDataA, NULL);
         if( streams[0] == NULL )
             log_error("ERROR: Creating input array A failed!\n");
             return -1;
-        streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, bufSize,
-                                    inDataB, NULL);
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), bufSize, inDataB, NULL);
         if( streams[1] == NULL )
             log_error("ERROR: Creating input array B failed!\n");
             return -1;
-        streams[2] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE, bufSize, NULL, NULL);
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), bufSize, NULL, NULL);
         if( streams[2] == NULL )
             log_error("ERROR: Creating output array failed!\n");
@@ -327,24 +324,19 @@
     fillWithTrickyNumbers_double( inDataA, inDataB, vecSize );
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_double) * vecSize * TEST_SIZE, inDataB, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataB, NULL);
     if( streams[1] == NULL )
         log_error("ERROR: Creating input array B failed!\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * TEST_SIZE, NULL, NULL);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * TEST_SIZE, NULL, NULL);
     if( streams[2] == NULL )
         log_error("ERROR: Creating output array failed!\n");
@@ -563,16 +555,13 @@
     fillWithTrickyNumbers_double( inDataA, NULL, vecSize );
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * TEST_SIZE, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * TEST_SIZE, NULL, NULL);
     if( streams[1] == NULL )
         log_error("ERROR: Creating output array failed!\n");
@@ -770,17 +759,13 @@
         inDataA[ i ] = any_double(d);
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
     if( streams[0] == NULL )
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_double) * vecSize * TEST_SIZE, NULL, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * vecSize * TEST_SIZE, NULL, NULL);
     if( streams[1] == NULL )
         log_error("ERROR: Creating output array failed!\n");
diff --git a/test_conformance/gl/helpers.cpp b/test_conformance/gl/helpers.cpp
index 16441a4..864059b 100644
--- a/test_conformance/gl/helpers.cpp
+++ b/test_conformance/gl/helpers.cpp
@@ -368,7 +368,7 @@
                 cl_float *outData = new cl_float[ numPixels * channelNum ];
                 for( size_t i = 0; i < numPixels * channelNum; i++ )
-                    outData[i] = cl_half_to_float(src[i]);
+                    outData[ i ] = convert_half_to_float(src[ i ]);
                 return (char *)outData;
diff --git a/test_conformance/gl/test_buffers.cpp b/test_conformance/gl/test_buffers.cpp
index 35f01ee..f11590f 100644
--- a/test_conformance/gl/test_buffers.cpp
+++ b/test_conformance/gl/test_buffers.cpp
@@ -184,7 +184,7 @@
     glBufferData( GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW );
     glBindBuffer( GL_ARRAY_BUFFER, 0 );
-    glFinish();
+    glFlush();
     /* Generate some streams. The first and last ones are GL, middle one just vanilla CL */
diff --git a/test_conformance/gl/test_fence_sync.cpp b/test_conformance/gl/test_fence_sync.cpp
index 00bf2cc..37202ce 100644
--- a/test_conformance/gl/test_fence_sync.cpp
+++ b/test_conformance/gl/test_fence_sync.cpp
@@ -74,7 +74,7 @@
     glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" );
     glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" );
-#ifndef GL_ARB_sync
 #define GL_MAX_SERVER_WAIT_TIMEOUT        0x9111
 #define GL_OBJECT_TYPE            0x9112
@@ -97,7 +97,6 @@
 #define GL_TIMEOUT_EXPIRED            0x911B
 #define GL_CONDITION_SATISFIED        0x911C
 #define GL_WAIT_FAILED            0x911D
 #define USING_ARB_sync 1
@@ -286,7 +285,7 @@
     virtual void * IRun( void )
         cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
-        return (void *)(uintptr_t)error;
+        return (void *)error;
diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp
index 07f5b65..0d0e5c7 100644
--- a/test_conformance/gl/test_image_methods.cpp
+++ b/test_conformance/gl/test_image_methods.cpp
@@ -19,7 +19,7 @@
 using namespace std;
-struct image_kernel_data
+typedef struct image_kernel_data
     cl_int width;
     cl_int height;
@@ -277,8 +277,7 @@
     test_error( error, "Unable to create kernel to test against" );
     // Create an output buffer
-    outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                   sizeof(outKernelData), NULL, &error);
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
     test_error( error, "Unable to create output buffer" );
     // Set up arguments and run
@@ -287,10 +286,10 @@
     error = clSetKernelArg( kernel, 1, sizeof( outDataBuffer ), &outDataBuffer );
     test_error( error, "Unable to set kernel argument" );
-    // Finish and Acquire.
-    glFinish();
-    error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &image, 0, NULL, NULL);
-    test_error(error, "Unable to acquire GL obejcts");
+  // Flush and Acquire.
+  glFlush();
+  error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &image, 0, NULL, NULL);
+  test_error( error, "Unable to acquire GL obejcts");
     size_t threads[1] = { 1 }, localThreads[1] = { 1 };
diff --git a/test_conformance/gles/CMakeLists.txt b/test_conformance/gles/CMakeLists.txt
index c76fe51..f367490 100644
--- a/test_conformance/gles/CMakeLists.txt
+++ b/test_conformance/gles/CMakeLists.txt
@@ -15,6 +15,11 @@
+    list(APPEND CLConform_LIBRARIES GLESv2)
+    list(APPEND CLConform_LIBRARIES libEGL libGLESv2 )
diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp
index 644fa63..d0c12c9 100644
--- a/test_conformance/gles/main.cpp
+++ b/test_conformance/gles/main.cpp
@@ -62,8 +62,8 @@
 TEST_FN_REDIRECTOR( renderbuffer_write )
 TEST_FN_REDIRECTOR( renderbuffer_getinfo )
-#ifdef GL_ES_VERSION_3_0
+#ifndef GL_ES_VERSION_2_0
+TEST_FN_REDIRECTOR( test_fence_sync )
 test_definition test_list[] = {
@@ -82,17 +82,14 @@
     TEST_FN_REDIRECT( renderbuffer_getinfo )
-#ifdef GL_ES_VERSION_3_0
+#ifndef GL_ES_VERSION_2_0
 test_definition test_list32[] = {
     TEST_FN_REDIRECT( fence_sync )
 const int test_num = ARRAY_SIZE( test_list );
-#ifdef GL_ES_VERSION_3_0
 const int test_num32 = ARRAY_SIZE( test_list32 );
 int main(int argc, const char *argv[])
@@ -116,15 +113,12 @@
         for( int i = 0; i < test_num; i++ )
             log_info( "\t%s\n", test_list[i].name );
-#ifdef GL_ES_VERSION_3_0
         log_info( "Available 3.2 tests:\n" );
         for( int i = 0; i < test_num32; i++ )
             log_info( "\t%s\n", test_list32[i].name );
-        log_info("Note: Any 3.2 test names must follow 2.1 test names on the "
-                 "command line.");
-        log_info("Use environment variables to specify desired device.");
+    log_info( "Note: Any 3.2 test names must follow 2.1 test names on the command line." );
+    log_info( "Use environment variables to specify desired device." );
         return 0;
@@ -147,14 +141,12 @@
   // Check to see if any 2.x or 3.2 test names were specified on the command line.
   unsigned first_32_testname = 0;
-#ifdef GL_ES_VERSION_3_0
   for (int j=1; (j<argc) && (!first_32_testname); ++j)
     for (int i = 0; i < test_num32; ++i)
       if (strcmp(test_list32[i].name, argv[j]) == 0 ) {
         first_32_testname = j;
   // Create the environment for the test.
     GLEnvironment *glEnv = GLEnvironment::Instance();
@@ -330,7 +322,7 @@
           error = -1;
           goto cleanup;
-#ifndef GLES3
+#ifdef GL_ES_VERSION_2_0
         log_info("Cannot test OpenGL 3.2! This test was built for OpenGL ES 2.0\n");
         error = -1;
         goto cleanup;
diff --git a/test_conformance/gles/setup_egl.cpp b/test_conformance/gles/setup_egl.cpp
index fe0f8ca..6bb53cf 100644
--- a/test_conformance/gles/setup_egl.cpp
+++ b/test_conformance/gles/setup_egl.cpp
@@ -112,17 +112,11 @@
         size_t dev_size;
         cl_int status;
-        clGetGLContextInfoKHR_fn GetGLContextInfo =
-            (clGetGLContextInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(
-                _platform, "clGetGLContextInfoKHR");
-        if (GetGLContextInfo == NULL)
-        {
-            print_error(status, "clGetGLContextInfoKHR failed");
-            return NULL;
-        }
-        status = GetGLContextInfo(properties, CL_DEVICES_FOR_GL_CONTEXT_KHR,
-                                  sizeof(devices), devices, &dev_size);
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_DEVICES_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
         if (status != CL_SUCCESS) {
             print_error(status, "clGetGLContextInfoKHR failed");
             return NULL;
@@ -130,9 +124,11 @@
         dev_size /= sizeof(cl_device_id);
         log_info("GL _context supports %d compute devices\n", dev_size);
-        status =
-            GetGLContextInfo(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
-                             sizeof(devices), devices, &dev_size);
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
         if (status != CL_SUCCESS) {
             print_error(status, "clGetGLContextInfoKHR failed");
             return NULL;
@@ -168,12 +164,9 @@
         // Check all devices, search for one that supports cl_khr_gl_sharing
         for (int i=0; i<(int)num_of_devices; i++) {
-            if (!is_extension_available(devices[i], "cl_khr_gl_sharing"))
-            {
+            if (!is_extension_available(devices[i], "cl_khr_gl_sharing"){
                 log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
-            }
-            else
-            {
+            } else {
                 log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
diff --git a/test_conformance/gles/test_fence_sync.cpp b/test_conformance/gles/test_fence_sync.cpp
index 0af91a4..7683b9c 100644
--- a/test_conformance/gles/test_fence_sync.cpp
+++ b/test_conformance/gles/test_fence_sync.cpp
@@ -160,7 +160,7 @@
     glAttachShader(program, vpShader);
     GLuint fpShader;
-    char *fpstr = (char *)malloc(sizeof(fragmentshader));
+    char* fpstr = (char*)malloc(strlen(fragmentshader));
     strcpy(fpstr, fragmentshader);
     fpShader = glCreateShader(GL_FRAGMENT_SHADER);
     glShaderSource(fpShader, 1, (const GLchar **)&fpstr, NULL);
@@ -297,7 +297,7 @@
     virtual void * IRun( void )
         cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
-        return (void *)(intptr_t)error;
+        return (void *)error;
diff --git a/test_conformance/half/Test_vLoadHalf.cpp b/test_conformance/half/Test_vLoadHalf.cpp
index 52867c2..7bc756d 100644
--- a/test_conformance/half/Test_vLoadHalf.cpp
+++ b/test_conformance/half/Test_vLoadHalf.cpp
@@ -20,7 +20,44 @@
 #include "cl_utils.h"
 #include "tests.h"
-#include <CL/cl_half.h>
+static inline float half2float( cl_ushort us )
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ unsigned int u; float f;}uu;
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+        int shift = __builtin_clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+            return uu.f;
+        }
+    exponent += 127 - 15;
+    exponent <<= 23;
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+    return uu.f;
 int Test_vLoadHalf_private( cl_device_id device, bool aligned )
@@ -170,38 +207,17 @@
         const char *source_local2[] = {
-            "#define VECTOR_LEN (",
-            vector_size_names[vectorSize],
-            "/",
-            align_divisors[vectorSize],
-            ")\n"
-            "#define ALIGN_TYPE ",
-            align_types[vectorSize],
-            "\n"
-            "__kernel void test( const __global half *p, __global float",
-            vector_size_name_extensions[vectorSize],
-            " *f )\n"
+            "__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
-            "   __local uchar data[",
-            local_buf_size,
-            "/",
-            align_divisors[vectorSize],
-            "*sizeof(ALIGN_TYPE)] ",
-            "__attribute__((aligned(sizeof(ALIGN_TYPE))));\n"
+            "   __local ", align_types[vectorSize], " data[", local_buf_size, "/", align_divisors[vectorSize], "];\n"
             "   __local half* hdata_p = (__local half*) data;\n"
-            "   __global ALIGN_TYPE* i_p = (__global ALIGN_TYPE*)p;\n"
+            "   __global ", align_types[vectorSize], "* i_p = (__global ", align_types[vectorSize],"*)p;\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t lid = get_local_id(0);\n"
             "   int k;\n"
-            "   for (k=0; k<VECTOR_LEN; k++)\n"
-            "     *(__local ",
-            "ALIGN_TYPE*)&(data[(lid*VECTOR_LEN+k)*sizeof(ALIGN_TYPE)]) = ",
-            "i_p[i*VECTOR_LEN+k];\n"
-            "   f[i] = vload",
-            aligned ? "a" : "",
-            "_half",
-            vector_size_name_extensions[vectorSize],
-            "( lid, hdata_p );\n"
+            "   for (k=0; k<",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"; k++)\n"
+            "     data[lid*",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"+k] = i_p[i*",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"+k];\n"
+            "   f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( lid, hdata_p );\n"
@@ -466,7 +482,8 @@
         //create the reference result
         const unsigned short *s = (const unsigned short *)gIn_half;
         float *d = (float *)gOut_single_reference;
-        for (j = 0; j < count; j++) d[j] = cl_half_to_float(s[j]);
+        for( j = 0; j < count; j++ )
+            d[j] = half2float( s[j] );
         //Check the vector lengths
         for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
@@ -527,7 +544,7 @@
                             error = -1;
-                            goto exit;
+                            break; // goto exit;
diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp
index c3a328a..2b24a68 100644
--- a/test_conformance/half/Test_vStoreHalf.cpp
+++ b/test_conformance/half/Test_vStoreHalf.cpp
@@ -21,8 +21,6 @@
 #include "cl_utils.h"
 #include "tests.h"
-#include <CL/cl_half.h>
 typedef struct ComputeReferenceInfoF_
     float *x;
@@ -210,44 +208,406 @@
     return ret;
-static cl_half float2half_rte(float f)
+static cl_ushort float2half_rte( float f );
+static cl_ushort float2half_rtz( float f );
+static cl_ushort float2half_rtp( float f );
+static cl_ushort float2half_rtn( float f );
+static cl_ushort double2half_rte( double f );
+static cl_ushort double2half_rtz( double f );
+static cl_ushort double2half_rtp( double f );
+static cl_ushort double2half_rtn( double f );
+static cl_ushort
+float2half_rte( float f )
-    return cl_half_from_float(f, CL_HALF_RTE);
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+    }
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+    return (u.u >> (24-11)) | sign;
-static cl_half float2half_rtz(float f)
+static cl_ushort
+float2half_rtz( float f )
-    return cl_half_from_float(f, CL_HALF_RTZ);
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+    {
+        if( x == INFINITY )
+            return 0x7c00 | sign;
+        return 0x7bff | sign;
+    }
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        return (cl_ushort)((int) x | sign);
+    }
+    u.u &= 0xFFFFE000U;
+    u.u -= 0x38000000U;
+    return (u.u >> (24-11)) | sign;
-static cl_half float2half_rtp(float f)
+static cl_ushort
+float2half_rtp( float f )
-    return cl_half_from_float(f, CL_HALF_RTP);
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+    // overflow
+    if( f > MAKE_HEX_FLOAT(0x1.ffcp15f, 0x1ffcL, 3) )
+        return 0x7c00;
+    if( f <= MAKE_HEX_FLOAT(-0x1.0p16f, -0x1L, 16) )
+    {
+        if( f == -INFINITY )
+            return 0xfc00;
+        return 0xfbff;
+    }
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+    {
+        if( f > 0 )
+            return 1;
+        return sign;
+    }
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        int r = (int) x;
+        r += (float) r != x && f > 0.0f;
+        return (cl_ushort)( r | sign);
+    }
+    float g = u.f;
+    u.u &= 0xFFFFE000U;
+    if( g > u.f )
+        u.u += 0x00002000U;
+    u.u -= 0x38000000U;
+    return (u.u >> (24-11)) | sign;
-static cl_half float2half_rtn(float f)
+static cl_ushort
+float2half_rtn( float f )
-    return cl_half_from_float(f, CL_HALF_RTN);
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+    // overflow
+    if( f >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+    {
+        if( f == INFINITY )
+            return 0x7c00;
+        return 0x7bff;
+    }
+    if( f < MAKE_HEX_FLOAT(-0x1.ffcp15f, -0x1ffcL, 3) )
+        return 0xfc00;
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+    {
+        if( f < 0 )
+            return 0x8001;
+        return sign;
+    }
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        int r = (int) x;
+        r += (float) r != x && f < 0.0f;
+        return (cl_ushort)( r | sign);
+    }
+    u.u &= 0xFFFFE000U;
+    if( u.f > f )
+        u.u += 0x00002000U;
+    u.u -= 0x38000000U;
+    return (u.u >> (24-11)) | sign;
-static cl_half double2half_rte(double f)
+static cl_ushort
+double2half_rte( double f )
-    return cl_half_from_double(f, CL_HALF_RTE);
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+    // overflow
+    if( x >= MAKE_HEX_DOUBLE(0x1.ffep15, 0x1ffeLL, 3) )
+        return 0x7c00 | sign;
+    // underflow
+    if( x <= MAKE_HEX_DOUBLE(0x1.0p-25, 0x1LL, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+    // very small
+    if( x < MAKE_HEX_DOUBLE(0x1.8p-24, 0x18LL, -28) )
+        return sign | 1;
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        u.f = x * MAKE_HEX_DOUBLE(0x1.0p-1050, 0x1LL, -1050);
+        return sign | u.u;
+    }
+    u.f *= MAKE_HEX_DOUBLE(0x1.0p42, 0x1LL, 42);
+    u.u &= 0x7ff0000000000000ULL;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_DOUBLE(0x1.0p-1008, 0x1LL, -1008);
+    return (u.u >> (53-11)) | sign;
-static cl_half double2half_rtz(double f)
+static cl_ushort
+double2half_rtz( double f )
-    return cl_half_from_double(f, CL_HALF_RTZ);
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+    if( x == INFINITY )
+        return 0x7c00 | sign;
+    // overflow
+    if( x >= MAKE_HEX_DOUBLE(0x1.0p16, 0x1LL, 16) )
+        return 0x7bff | sign;
+    // underflow
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-24, 0x1LL, -24) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        return (cl_ushort)((int) x | sign);
+    }
+    u.u &= 0xFFFFFC0000000000ULL;
+    u.u -= 0x3F00000000000000ULL;
+    return (u.u >> (53-11)) | sign;
-static cl_half double2half_rtp(double f)
+static cl_ushort
+double2half_rtp( double f )
-    return cl_half_from_double(f, CL_HALF_RTP);
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+    // overflow
+    if( f > MAKE_HEX_DOUBLE(0x1.ffcp15, 0x1ffcLL, 3) )
+        return 0x7c00;
+    if( f <= MAKE_HEX_DOUBLE(-0x1.0p16, -0x1LL, 16) )
+    {
+        if( f == -INFINITY )
+            return 0xfc00;
+        return 0xfbff;
+    }
+    // underflow
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-24, 0x1LL, -24) )
+    {
+        if( f > 0 )
+            return 1;
+        return sign;
+    }
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        int r = (int) x;
+        if( 0 == sign )
+            r += (double) r != x;
+        return (cl_ushort)( r | sign);
+    }
+    double g = u.f;
+    u.u &= 0xFFFFFC0000000000ULL;
+    if( g != u.f && 0 == sign)
+        u.u += 0x0000040000000000ULL;
+    u.u -= 0x3F00000000000000ULL;
+    return (u.u >> (53-11)) | sign;
-static cl_half double2half_rtn(double f)
+static cl_ushort
+double2half_rtn( double f )
-    return cl_half_from_double(f, CL_HALF_RTN);
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+    // overflow
+    if( f >= MAKE_HEX_DOUBLE(0x1.0p16, 0x1LL, 16) )
+    {
+        if( f == INFINITY )
+            return 0x7c00;
+        return 0x7bff;
+    }
+    if( f < MAKE_HEX_DOUBLE(-0x1.ffcp15, -0x1ffcLL, 3) )
+        return 0xfc00;
+    // underflow
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-24, 0x1LL, -24) )
+    {
+        if( f < 0 )
+            return 0x8001;
+        return sign;
+    }
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        x *= MAKE_HEX_DOUBLE(0x1.0p24, 0x1LL, 24);
+        int r = (int) x;
+        if( sign )
+            r += (double) r != x;
+        return (cl_ushort)( r | sign);
+    }
+    double g = u.f;
+    u.u &= 0xFFFFFC0000000000ULL;
+    if( g < u.f && sign)
+        u.u += 0x0000040000000000ULL;
+    u.u -= 0x3F00000000000000ULL;
+    return (u.u >> (53-11)) | sign;
 int test_vstore_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
@@ -696,30 +1056,30 @@
     ComputeReferenceInfoF fref;
     fref.x = (float *)gIn_single;
-    fref.r = (cl_half *)gOut_half_reference;
+    fref.r = (cl_ushort *)gOut_half_reference;
     fref.f = referenceFunc;
     fref.lim = blockCount;
     fref.count = (blockCount + threadCount - 1) / threadCount;
     CheckResultInfoF fchk;
     fchk.x = (const float *)gIn_single;
-    fchk.r = (const cl_half *)gOut_half_reference;
-    fchk.s = (const cl_half *)gOut_half;
+    fchk.r = (const cl_ushort *)gOut_half_reference;
+    fchk.s = (const cl_ushort *)gOut_half;
     fchk.f = referenceFunc;
     fchk.lim = blockCount;
     fchk.count = (blockCount + threadCount - 1) / threadCount;
     ComputeReferenceInfoD dref;
     dref.x = (double *)gIn_double;
-    dref.r = (cl_half *)gOut_half_reference_double;
+    dref.r = (cl_ushort *)gOut_half_reference_double;
     dref.f = doubleReferenceFunc;
     dref.lim = blockCount;
     dref.count = (blockCount + threadCount - 1) / threadCount;
     CheckResultInfoD dchk;
     dchk.x = (const double *)gIn_double;
-    dchk.r = (const cl_half *)gOut_half_reference_double;
-    dchk.s = (const cl_half *)gOut_half;
+    dchk.r = (const cl_ushort *)gOut_half_reference_double;
+    dchk.s = (const cl_ushort *)gOut_half;
     dchk.f = doubleReferenceFunc;
     dchk.lim = blockCount;
     dchk.count = (blockCount + threadCount - 1) / threadCount;
@@ -764,9 +1124,7 @@
                 cl_uint pattern = 0xdeaddead;
                 memset_pattern4( gOut_half, &pattern, BUFFER_SIZE/2);
-                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE,
-                                             0, count * sizeof(cl_half),
-                                             gOut_half, 0, NULL, NULL);
+                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                 if (error) {
                     vlog_error( "Failure in clWriteArray\n" );
@@ -781,9 +1139,7 @@
                     goto exit;
-                error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0,
-                                            count * sizeof(cl_half), gOut_half,
-                                            0, NULL, NULL);
+                error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                 if (error) {
                     vlog_error( "Failure in clReadArray\n" );
@@ -799,9 +1155,7 @@
                 if (gTestDouble) {
                     memset_pattern4( gOut_half, &pattern, BUFFER_SIZE/2);
-                    error = clEnqueueWriteBuffer(
-                        gQueue, gOutBuffer_half, CL_FALSE, 0,
-                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                    error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                     if (error) {
                         vlog_error( "Failure in clWriteArray\n" );
@@ -816,9 +1170,7 @@
                         goto exit;
-                    error = clEnqueueReadBuffer(
-                        gQueue, gOutBuffer_half, CL_TRUE, 0,
-                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                    error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                     if (error) {
                         vlog_error( "Failure in clReadArray\n" );
@@ -1293,30 +1645,30 @@
     ComputeReferenceInfoF fref;
     fref.x = (float *)gIn_single;
-    fref.r = (cl_half *)gOut_half_reference;
+    fref.r = (cl_ushort *)gOut_half_reference;
     fref.f = referenceFunc;
     fref.lim = blockCount;
     fref.count = (blockCount + threadCount - 1) / threadCount;
     CheckResultInfoF fchk;
     fchk.x = (const float *)gIn_single;
-    fchk.r = (const cl_half *)gOut_half_reference;
-    fchk.s = (const cl_half *)gOut_half;
+    fchk.r = (const cl_ushort *)gOut_half_reference;
+    fchk.s = (const cl_ushort *)gOut_half;
     fchk.f = referenceFunc;
     fchk.lim = blockCount;
     fchk.count = (blockCount + threadCount - 1) / threadCount;
     ComputeReferenceInfoD dref;
     dref.x = (double *)gIn_double;
-    dref.r = (cl_half *)gOut_half_reference_double;
+    dref.r = (cl_ushort *)gOut_half_reference_double;
     dref.f = doubleReferenceFunc;
     dref.lim = blockCount;
     dref.count = (blockCount + threadCount - 1) / threadCount;
     CheckResultInfoD dchk;
     dchk.x = (const double *)gIn_double;
-    dchk.r = (const cl_half *)gOut_half_reference_double;
-    dchk.s = (const cl_half *)gOut_half;
+    dchk.r = (const cl_ushort *)gOut_half_reference_double;
+    dchk.s = (const cl_ushort *)gOut_half;
     dchk.f = doubleReferenceFunc;
     dchk.lim = blockCount;
     dchk.count = (blockCount + threadCount - 1) / threadCount;
@@ -1361,9 +1713,7 @@
                 cl_uint pattern = 0xdeaddead;
                 memset_pattern4(gOut_half, &pattern, BUFFER_SIZE/2);
-                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE,
-                                             0, count * sizeof(cl_half),
-                                             gOut_half, 0, NULL, NULL);
+                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                 if (error) {
                     vlog_error( "Failure in clWriteArray\n" );
@@ -1378,9 +1728,7 @@
                     goto exit;
-                error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0,
-                                            count * sizeof(cl_half), gOut_half,
-                                            0, NULL, NULL);
+                error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                 if (error) {
                     vlog_error( "Failure in clReadArray\n" );
@@ -1396,9 +1744,7 @@
                 if (gTestDouble) {
                     memset_pattern4(gOut_half, &pattern, BUFFER_SIZE/2);
-                    error = clEnqueueWriteBuffer(
-                        gQueue, gOutBuffer_half, CL_FALSE, 0,
-                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                    error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                     if (error) {
                         vlog_error( "Failure in clWriteArray\n" );
@@ -1413,9 +1759,7 @@
                         goto exit;
-                    error = clEnqueueReadBuffer(
-                        gQueue, gOutBuffer_half, CL_TRUE, 0,
-                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                    error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
                     if (error) {
                         vlog_error( "Failure in clReadArray\n" );
diff --git a/test_conformance/half/cl_utils.h b/test_conformance/half/cl_utils.h
index 50d8af3..82a6311 100644
--- a/test_conformance/half/cl_utils.h
+++ b/test_conformance/half/cl_utils.h
@@ -18,7 +18,6 @@
 #include "harness/testHarness.h"
 #include "harness/compat.h"
-#include "harness/conversions.h"
 #include <stdio.h>
@@ -110,6 +109,43 @@
     return u;
+static inline int IsHalfSubnormal( uint16_t x )
+    // this relies on interger overflow to exclude 0 as a subnormal
+    return ( ( x & 0x7fffU ) - 1U ) < 0x03ffU;
+// prevent silent failures due to missing FLT_RADIX
+#ifndef FLT_RADIX
+    #error FLT_RADIX is not defined by float.h
+static inline int IsFloatSubnormal( double x )
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf((float) x);
+    return (u.u-1) < 0x007fffffU;
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+static inline int IsDoubleSubnormal( long double x )
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs((double)x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
 #endif /* CL_UTILS_H */
diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp
index 6600cc5..1af138c 100644
--- a/test_conformance/half/main.cpp
+++ b/test_conformance/half/main.cpp
@@ -19,7 +19,7 @@
 #if !defined (_WIN32)
 #include <sys/resource.h>
-#if defined(__APPLE__)
+#if !defined(__ANDROID__)
 #include <sys/sysctl.h>
 #include <libgen.h>
diff --git a/test_conformance/images/clCopyImage/main.cpp b/test_conformance/images/clCopyImage/main.cpp
index c2cad01..04a8f51 100644
--- a/test_conformance/images/clCopyImage/main.cpp
+++ b/test_conformance/images/clCopyImage/main.cpp
@@ -13,16 +13,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "../harness/compat.h"
 #include <stdio.h>
 #include <string.h>
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
 #include "../testBase.h"
-#include "../harness/compat.h"
 #include "../harness/testHarness.h"
 bool gDebugTrace;
 bool gTestSmallImages;
 bool gTestMaxImages;
+bool gUseRamp;
 bool gEnablePitch;
 bool gTestMipmaps;
 int gTypesToTest;
@@ -31,6 +38,8 @@
 extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, MethodsToTest testMethod );
 static void printUsage( const char *execName );
 int test_1D(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
@@ -126,6 +135,8 @@
             gTestSmallImages = true;
         else if( strcmp( argv[i], "max_images" ) == 0 )
             gTestMaxImages = true;
+        else if( strcmp( argv[i], "use_ramps" ) == 0 )
+            gUseRamp = true;
         else if( strcmp( argv[i], "use_pitches" ) == 0 )
             gEnablePitch = true;
@@ -151,8 +162,7 @@
     if( gTestSmallImages )
         log_info( "Note: Using small test images\n" );
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
     return ret;
@@ -172,6 +182,7 @@
     log_info( "\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
     log_info( "\trandomize - Use random seed\n" );
     log_info( "\tuse_pitches - Enables row and slice pitches\n" );
+    log_info( "\tuse_ramp - Instead of random data, uses images filled with ramps (and 0xff on any padding pixels) to ease debugging\n" );
     log_info( "\n" );
     log_info( "Test names:\n" );
     for( int i = 0; i < test_num; i++ )
diff --git a/test_conformance/images/clCopyImage/test_copy_1D.cpp b/test_conformance/images/clCopyImage/test_copy_1D.cpp
index 2c996c7..ab22320 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp
@@ -15,6 +15,14 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
diff --git a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
index 0b61693..62eed3f 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
@@ -15,6 +15,14 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
diff --git a/test_conformance/images/clCopyImage/test_copy_2D.cpp b/test_conformance/images/clCopyImage/test_copy_2D.cpp
index 1a69a1f..7af2fe3 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp
@@ -15,6 +15,14 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
index eb6dd55..f784230 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
@@ -16,6 +16,14 @@
 #include "../testBase.h"
 #include "../common.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
index 8a56c95..d341455 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
@@ -16,6 +16,14 @@
 #include "../testBase.h"
 #include "../common.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
index 6327ba5..5624245 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
@@ -15,6 +15,13 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
 // Defined in test_copy_generic.cpp
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
diff --git a/test_conformance/images/clCopyImage/test_copy_3D.cpp b/test_conformance/images/clCopyImage/test_copy_3D.cpp
index da6731d..fb17623 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D.cpp
@@ -15,6 +15,13 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
 // Defined in test_copy_generic.cpp
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
diff --git a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
index c098f64..b3b3223 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
@@ -16,6 +16,14 @@
 #include "../testBase.h"
 #include "../common.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
 extern int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                                    const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp
index 026916e..5a8f3d3 100644
--- a/test_conformance/images/clCopyImage/test_copy_generic.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp
@@ -15,6 +15,14 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
 static void CL_CALLBACK free_pitch_buffer( cl_mem image, void *buf )
     free( buf );
@@ -105,41 +113,23 @@
     if ( *error != CL_SUCCESS )
-        long long unsigned imageSize = get_image_size_mb(imageInfo);
         switch (imageInfo->type)
             case CL_MEM_OBJECT_IMAGE1D:
-                log_error("ERROR: Unable to create 1D image of size %d (%llu "
-                          "MB):(%s)",
-                          (int)imageInfo->width, imageSize,
-                          IGetErrorString(*error));
+                log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( *error ) );
             case CL_MEM_OBJECT_IMAGE2D:
-                log_error("ERROR: Unable to create 2D image of size %d x %d "
-                          "(%llu MB):(%s)",
-                          (int)imageInfo->width, (int)imageInfo->height,
-                          imageSize, IGetErrorString(*error));
+                log_error( "ERROR: Unable to create 2D image of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, IGetErrorString( *error ) );
             case CL_MEM_OBJECT_IMAGE3D:
-                log_error("ERROR: Unable to create 3D image of size %d x %d x "
-                          "%d (%llu MB):(%s)",
-                          (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->depth, imageSize,
-                          IGetErrorString(*error));
+                log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( *error ) );
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                log_error("ERROR: Unable to create 1D image array of size %d x "
-                          "%d (%llu MB):(%s)",
-                          (int)imageInfo->width, (int)imageInfo->arraySize,
-                          imageSize, IGetErrorString(*error));
+                log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( *error ) );
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                log_error("ERROR: Unable to create 2D image array of size %d x "
-                          "%d x %d (%llu MB):(%s)",
-                          (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->arraySize, imageSize,
-                          IGetErrorString(*error));
+                log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( *error ) );
         log_error("ERROR: and %llu mip levels\n", (unsigned long long) imageInfo->num_mip_levels);
@@ -284,6 +274,7 @@
     return img;
 // WARNING -- not thread safe
 BufferOwningPtr<char> srcData;
 BufferOwningPtr<char> dstData;
@@ -308,7 +299,24 @@
-        srcBytes = get_image_size(srcImageInfo);
+        switch (srcImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                srcBytes = srcImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                srcBytes = srcImageInfo->height * srcImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                srcBytes = srcImageInfo->depth * srcImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                srcBytes = srcImageInfo->arraySize * srcImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                srcBytes = srcImageInfo->arraySize * srcImageInfo->slicePitch;
+                break;
+        }
     if (srcBytes > srcData.getSize())
@@ -344,7 +352,24 @@
-        destImageSize = get_image_size(dstImageInfo);
+        switch (dstImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                destImageSize = dstImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                destImageSize = dstImageInfo->height * dstImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                destImageSize = dstImageInfo->depth * dstImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                destImageSize = dstImageInfo->arraySize * dstImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                destImageSize = dstImageInfo->arraySize * dstImageInfo->slicePitch;
+                break;
+        }
     if (destImageSize > dstData.getSize())
@@ -356,11 +381,7 @@
             log_error( "ERROR: Unable to malloc %lu bytes for dstData\n", destImageSize );
             return -1;
-    }
-    if (destImageSize > dstHost.getSize())
-    {
-        dstHost.reset(NULL);
         if (dstHost == NULL) {
@@ -547,17 +568,58 @@
             if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 )
+                log_error( "ERROR: Scanline %d did not verify for image size %d,%d,%d pitch %d (extra %d bytes)\n", (int)y, (int)dstImageInfo->width, (int)dstImageInfo->height, (int)dstImageInfo->depth, (int)dstImageInfo->rowPitch, (int)dstImageInfo->rowPitch - (int)dstImageInfo->width * (int)get_pixel_size( dstImageInfo->format ) );
                 // Find the first missing pixel
                 size_t pixel_size = get_pixel_size( dstImageInfo->format );
                 size_t where = 0;
                 for( where = 0; where < dstImageInfo->width; where++ )
                     if( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) )
+                log_error( "Failed at column: %ld   ", where );
+                switch( pixel_size )
+                {
+                    case 1:
+                        log_error( "*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 2:
+                        log_error( "*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 3:
+                        log_error( "*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. {0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
+                                  ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(sourcePtr + pixel_size * where))[1], ((cl_uchar*)(sourcePtr + pixel_size * where))[2],
+                                  ((cl_uchar*)(destPtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[1], ((cl_uchar*)(destPtr + pixel_size * where))[2]
+                                  );
+                        break;
+                    case 4:
+                        log_error( "*0x%8.8x vs. 0x%8.8x\n", ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 6:
+                        log_error( "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. {0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
+                                  ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(sourcePtr + pixel_size * where))[1], ((cl_ushort*)(sourcePtr + pixel_size * where))[2],
+                                  ((cl_ushort*)(destPtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[1], ((cl_ushort*)(destPtr + pixel_size * where))[2]
+                                  );
+                        break;
+                    case 8:
+                        log_error( "*0x%16.16llx vs. 0x%16.16llx\n", ((cl_ulong*)(sourcePtr + pixel_size * where))[0], ((cl_ulong*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 12:
+                        log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                                  ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2],
+                                  ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2]
+                                  );
+                        break;
+                    case 16:
+                        log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                                  ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2], ((cl_uint*)(sourcePtr + pixel_size * where))[3],
+                                  ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2], ((cl_uint*)(destPtr + pixel_size * where))[3]
+                                  );
+                        break;
+                    default:
+                        log_error( "Don't know how to print pixel size of %ld\n", pixel_size );
+                        break;
+                }
-                print_first_pixel_difference_error(
-                    where, sourcePtr + pixel_size * where,
-                    destPtr + pixel_size * where, dstImageInfo, y,
-                    dstImageInfo->depth);
                 return -1;
             sourcePtr += rowPitch;
@@ -578,14 +640,113 @@
         return error;
-    // Ensure the unmap call completes.
-    error = clFinish(queue);
-    if (error != CL_SUCCESS)
-    {
-        log_error("ERROR: clFinish() failed to return CL_SUCCESS: %s\n",
-                  IGetErrorString(error));
-        return error;
-    }
     return 0;
+int test_copy_image_size_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, MTdata d )
+    size_t sourcePos[ 3 ], destPos[ 3 ], regionSize[ 3 ];
+    int ret = 0, retCode;
+    for (int i = 0; i < 8; i++)
+    {
+        switch (srcImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = 1;
+                sourcePos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->height - 4), d );
+                sourcePos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->height - 4), d );
+                sourcePos[ 2 ] = random_in_range( 0, (int)(srcImageInfo->depth - 4), d );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->arraySize - 4), d );
+                sourcePos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->height - 4), d );
+                sourcePos[ 2 ] = random_in_range( 0, (int)(srcImageInfo->arraySize - 4), d );
+                break;
+        }
+        switch (dstImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = 1;
+                destPos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->height - 4), d );
+                destPos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->height - 4), d );
+                destPos[ 2 ] = random_in_range( 0, (int)(dstImageInfo->depth - 4), d );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->arraySize - 4), d );
+                destPos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->height - 4), d );
+                destPos[ 2 ] = random_in_range( 0, (int)(dstImageInfo->arraySize - 4), d );
+                break;
+        }
+        if ( (dstImageInfo->width - destPos[0]) < (srcImageInfo->width - sourcePos[0]) )
+            regionSize[0] = random_in_range(1, (dstImageInfo->width - destPos[0]), d);
+        else
+            regionSize[0] = random_in_range(1, (srcImageInfo->width - sourcePos[0]), d);
+        if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE1D || dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D)
+            regionSize[1] = 0;
+        else
+        {
+            if ( (dstImageInfo->height - destPos[1]) < (srcImageInfo->height - sourcePos[1]) )
+                regionSize[1] = random_in_range(1, (dstImageInfo->height - destPos[1]), d);
+            else
+                regionSize[1] = random_in_range(1, (srcImageInfo->height - sourcePos[1]), d);
+        }
+        regionSize[2] = 0;
+        if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D && srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+        {
+            if ( (dstImageInfo->depth - destPos[2]) < (srcImageInfo->depth - sourcePos[2]) )
+                regionSize[2] = random_in_range(1, (dstImageInfo->depth - destPos[2]), d);
+            else
+                regionSize[2] = random_in_range(1, (srcImageInfo->depth - sourcePos[2]), d);
+        }
+        else if ( (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) )
+        {
+            if ( (dstImageInfo->arraySize - destPos[2]) < (srcImageInfo->arraySize - sourcePos[2]) )
+                regionSize[2] = random_in_range(1, (dstImageInfo->arraySize - destPos[2]), d);
+            else
+                regionSize[2] = random_in_range(1, (srcImageInfo->arraySize - sourcePos[2]), d);
+        }
+        // Go for it!
+        retCode = test_copy_image_generic( context, queue, srcImageInfo, dstImageInfo, sourcePos, destPos, regionSize, d );
+        if( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+    return ret;
diff --git a/test_conformance/images/clCopyImage/test_loops.cpp b/test_conformance/images/clCopyImage/test_loops.cpp
index 6ee1e53..1cb79e9 100644
--- a/test_conformance/images/clCopyImage/test_loops.cpp
+++ b/test_conformance/images/clCopyImage/test_loops.cpp
@@ -16,6 +16,18 @@
 #include "../testBase.h"
 #include "../common.h"
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern bool               gTestMipmaps;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_order      gChannelOrderToUse;
+extern bool gDebugTrace;
 extern int test_copy_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
 extern int test_copy_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
 extern int test_copy_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
@@ -105,14 +117,25 @@
     int ret = 0;
     // Grab the list of supported image formats for integer reads
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, imageType, formatList, flags)) return -1;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
-    std::vector<bool> filterFlags(formatList.size(), false);
-    filter_formats(formatList, filterFlags, nullptr);
+    if( get_format_list( context, imageType, formatList, numFormats, flags ) )
+        return -1;
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    filter_formats(formatList, filterFlags, numFormats, NULL);
     // Run the format list
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
         int test_return = 0;
         if( filterFlags[i] )
@@ -157,6 +180,9 @@
         ret += test_return;
+    delete filterFlags;
+    delete formatList;
     return ret;
diff --git a/test_conformance/images/clFillImage/main.cpp b/test_conformance/images/clFillImage/main.cpp
index b19d85a..23d9e4c 100644
--- a/test_conformance/images/clFillImage/main.cpp
+++ b/test_conformance/images/clFillImage/main.cpp
@@ -13,11 +13,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "../harness/compat.h"
 #include <stdio.h>
 #include <string.h>
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
 #include "../testBase.h"
-#include "../harness/compat.h"
 #include "../harness/testHarness.h"
 bool gDebugTrace;
@@ -31,6 +37,7 @@
 extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, MethodsToTest testMethod );
 static void printUsage( const char *execName );
 int test_1D(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
@@ -124,8 +131,7 @@
     if ( gTestSmallImages )
         log_info( "Note: Using small test images\n" );
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
     return ret;
diff --git a/test_conformance/images/clFillImage/test_fill_1D.cpp b/test_conformance/images/clFillImage/test_fill_1D.cpp
index c3f2318..51eb822 100644
--- a/test_conformance/images/clFillImage/test_fill_1D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D.cpp
@@ -15,6 +15,14 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
 // Defined in test_fill_2D_3D.cpp
 extern int test_fill_image_generic( cl_context context, cl_command_queue queue, image_descriptor *imageInfo,
                                     const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );
diff --git a/test_conformance/images/clFillImage/test_fill_1D_array.cpp b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
index b4347a4..edbcacd 100644
--- a/test_conformance/images/clFillImage/test_fill_1D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
@@ -15,6 +15,14 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
 // Defined in test_fill_2D_3D.cpp
 extern int test_fill_image_generic( cl_context context, cl_command_queue queue, image_descriptor *imageInfo,
                                     const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );
diff --git a/test_conformance/images/clFillImage/test_fill_2D.cpp b/test_conformance/images/clFillImage/test_fill_2D.cpp
index bb66fc2..8e76e86 100644
--- a/test_conformance/images/clFillImage/test_fill_2D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D.cpp
@@ -15,6 +15,14 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
 // Defined in test_fill_2D_3D.cpp
 extern int test_fill_image_generic( cl_context context, cl_command_queue queue, image_descriptor *imageInfo,
                                     const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );
diff --git a/test_conformance/images/clFillImage/test_fill_2D_array.cpp b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
index 3265aab..260b869 100644
--- a/test_conformance/images/clFillImage/test_fill_2D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
@@ -15,6 +15,13 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
 // Defined in test_fill_2D_3D.cpp
 extern int test_fill_image_generic( cl_context context, cl_command_queue queue, image_descriptor *imageInfo,
                                    const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );
diff --git a/test_conformance/images/clFillImage/test_fill_3D.cpp b/test_conformance/images/clFillImage/test_fill_3D.cpp
index 9db0ac7..298db0e 100644
--- a/test_conformance/images/clFillImage/test_fill_3D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_3D.cpp
@@ -15,6 +15,13 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
 // Defined in test_fill_2D_3D.cpp
 extern int test_fill_image_generic( cl_context context, cl_command_queue queue, image_descriptor *imageInfo,
                                    const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );
diff --git a/test_conformance/images/clFillImage/test_fill_generic.cpp b/test_conformance/images/clFillImage/test_fill_generic.cpp
index 59bf24a..6b59bad 100644
--- a/test_conformance/images/clFillImage/test_fill_generic.cpp
+++ b/test_conformance/images/clFillImage/test_fill_generic.cpp
@@ -15,13 +15,23 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
 extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
 static void CL_CALLBACK free_pitch_buffer( cl_mem image, void *buf )
     free( buf );
 cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr<char>& data, image_descriptor *imageInfo, int *error )
     cl_mem img;
@@ -478,16 +488,58 @@
             if (memcmp( sourcePtr, destPtr, scanlineSize ) != 0)
+                log_error( "ERROR: Scanline %d did not verify for image size %d,%d,%d pitch %d (extra %d bytes)\n", (int)y, (int)imageInfo->width, (int)imageInfo->height, (int)thirdDim, (int)imageInfo->rowPitch, (int)imageInfo->rowPitch - (int)imageInfo->width * (int)get_pixel_size( imageInfo->format ) );
                 // Find the first missing pixel
                 size_t pixel_size = get_pixel_size( imageInfo->format );
                 size_t where = 0;
                 for ( where = 0; where < imageInfo->width; where++ )
                     if ( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) )
+                log_error( "Failed at column: %ld   ", where );
+                switch ( pixel_size )
+                {
+                case 1:
+                    log_error( "*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 2:
+                    log_error( "*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 3:
+                    log_error( "*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. {0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
+                               ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(sourcePtr + pixel_size * where))[1], ((cl_uchar*)(sourcePtr + pixel_size * where))[2],
+                               ((cl_uchar*)(destPtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[1], ((cl_uchar*)(destPtr + pixel_size * where))[2]
+                             );
+                    break;
+                case 4:
+                    log_error( "*0x%8.8x vs. 0x%8.8x\n", ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 6:
+                    log_error( "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. {0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
+                               ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(sourcePtr + pixel_size * where))[1], ((cl_ushort*)(sourcePtr + pixel_size * where))[2],
+                               ((cl_ushort*)(destPtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[1], ((cl_ushort*)(destPtr + pixel_size * where))[2]
+                             );
+                    break;
+                case 8:
+                    log_error( "*0x%16.16llx vs. 0x%16.16llx\n", ((cl_ulong*)(sourcePtr + pixel_size * where))[0], ((cl_ulong*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 12:
+                    log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                               ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2],
+                               ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2]
+                             );
+                    break;
+                case 16:
+                    log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                               ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2], ((cl_uint*)(sourcePtr + pixel_size * where))[3],
+                               ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2], ((cl_uint*)(destPtr + pixel_size * where))[3]
+                             );
+                    break;
+                default:
+                    log_error( "Don't know how to print pixel size of %ld\n", pixel_size );
+                    break;
+                }
-                print_first_pixel_difference_error(
-                    where, sourcePtr + pixel_size * where,
-                    destPtr + pixel_size * where, imageInfo, y, thirdDim);
                 return -1;
diff --git a/test_conformance/images/clFillImage/test_loops.cpp b/test_conformance/images/clFillImage/test_loops.cpp
index 759f48d..0a4c571 100644
--- a/test_conformance/images/clFillImage/test_loops.cpp
+++ b/test_conformance/images/clFillImage/test_loops.cpp
@@ -16,7 +16,14 @@
 #include "../testBase.h"
 #include "../common.h"
-extern int gTypesToTest;
+extern bool               gDebugTrace;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type    gChannelTypeToUse;
+extern cl_channel_order   gChannelOrderToUse;
 extern int test_fill_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType outputType );
 extern int test_fill_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType outputType );
@@ -69,22 +76,35 @@
     int ret = 0;
     // Grab the list of supported image formats
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, imageType, formatList, flags)) return -1;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+    if ( get_format_list( context, imageType, formatList, numFormats, flags ) )
+        return -1;
+    filterFlags = new bool[ numFormats ];
+    if ( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
     for (auto test : imageTestTypes)
         if (gTypesToTest & test.type)
-            std::vector<bool> filterFlags(formatList.size(), false);
-            if (filter_formats(formatList, filterFlags, test.channelTypes) == 0)
+            if (filter_formats(formatList, filterFlags, numFormats,
+                               test.channelTypes)
+                == 0)
                 log_info("No formats supported for %s type\n",;
                 // Run the format list
-                for (unsigned int i = 0; i < formatList.size(); i++)
+                for (unsigned int i = 0; i < numFormats; i++)
                     if (filterFlags[i])
@@ -112,6 +132,9 @@
+    delete[] filterFlags;
+    delete[] formatList;
     return ret;
diff --git a/test_conformance/images/clGetInfo/main.cpp b/test_conformance/images/clGetInfo/main.cpp
index 80b3cbb..cea2ad6 100644
--- a/test_conformance/images/clGetInfo/main.cpp
+++ b/test_conformance/images/clGetInfo/main.cpp
@@ -13,21 +13,30 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "../harness/compat.h"
 #include <stdio.h>
 #include <string.h>
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
 #include "../testBase.h"
-#include "../harness/compat.h"
 bool gDebugTrace;
 bool gTestSmallImages;
 bool gTestMaxImages;
+int  gTypesToTest;
 cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
 cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
 extern int test_image_set( cl_device_id device, cl_context context, cl_mem_object_type image_type );
 static void printUsage( const char *execName );
 int test_1D(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
     return test_image_set( device, context, CL_MEM_OBJECT_IMAGE1D );
@@ -108,8 +117,7 @@
     if( gTestSmallImages )
         log_info( "Note: Using small test images\n" );
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
     return ret;
diff --git a/test_conformance/images/clGetInfo/test_1D.cpp b/test_conformance/images/clGetInfo/test_1D.cpp
index 0d704b8..e1d9206 100644
--- a/test_conformance/images/clGetInfo/test_1D.cpp
+++ b/test_conformance/images/clGetInfo/test_1D.cpp
@@ -15,6 +15,11 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
 extern int test_get_image_info_single( cl_context context, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch );
diff --git a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
index 447fc7c..c250e09 100644
--- a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
+++ b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
@@ -15,6 +15,11 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
 extern int test_get_image_info_single( cl_context context, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch );
 int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags )
diff --git a/test_conformance/images/clGetInfo/test_2D.cpp b/test_conformance/images/clGetInfo/test_2D.cpp
index 74a6012..4953707 100644
--- a/test_conformance/images/clGetInfo/test_2D.cpp
+++ b/test_conformance/images/clGetInfo/test_2D.cpp
@@ -15,6 +15,12 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
 int test_get_image_info_single( cl_context context, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch )
     int error;
diff --git a/test_conformance/images/clGetInfo/test_3D.cpp b/test_conformance/images/clGetInfo/test_3D.cpp
index af5062e..4bc189a 100644
--- a/test_conformance/images/clGetInfo/test_3D.cpp
+++ b/test_conformance/images/clGetInfo/test_3D.cpp
@@ -15,6 +15,11 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
 extern int test_get_image_info_single( cl_context context, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch );
 int test_get_image_info_3D( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags )
diff --git a/test_conformance/images/clGetInfo/test_loops.cpp b/test_conformance/images/clGetInfo/test_loops.cpp
index 17f02d8..e64ec3b 100644
--- a/test_conformance/images/clGetInfo/test_loops.cpp
+++ b/test_conformance/images/clGetInfo/test_loops.cpp
@@ -15,6 +15,18 @@
 #include "../testBase.h"
 #include "../common.h"
+#include "harness/imageHelpers.h"
+#include <algorithm>
+#include <iterator>
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern bool gDebugTrace;
 extern int test_get_image_info_1D( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags );
 extern int test_get_image_info_2D( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags );
@@ -22,6 +34,30 @@
 extern int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags );
 extern int test_get_image_info_2D_array( cl_device_id device, cl_context context, cl_image_format *format, cl_mem_flags flags );
+static bool check_minimum_supported(cl_image_format *formatList,
+                                    unsigned int numFormats,
+                                    cl_mem_flags flags,
+                                    cl_mem_object_type image_type,
+                                    cl_device_id device)
+	bool passed = true;
+	Version version = get_device_cl_version(device);
+	std::vector<cl_image_format> formatsToSupport;
+	build_required_image_formats(flags, image_type, device, formatsToSupport);
+	for (auto &format: formatsToSupport)
+	{
+		if( !find_format( formatList, numFormats, &format ) )
+		{
+			log_error( "ERROR: Format required by OpenCL %s is not supported: ", version.to_string().c_str() );
+			print_header( &format, true );
+			passed = false;
+		}
+	}
+	return passed;
 int test_image_type( cl_device_id device, cl_context context, cl_mem_object_type image_type, cl_mem_flags flags )
     log_info( "Running %s %s-only tests...\n", convert_image_type_to_string(image_type), flags == CL_MEM_READ_ONLY ? "read" : "write" );
@@ -29,14 +65,39 @@
     int ret = 0;
     // Grab the list of supported image formats for integer reads
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, image_type, formatList, flags)) return -1;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
-    std::vector<bool> filterFlags(formatList.size(), false);
-    filter_formats(formatList, filterFlags, nullptr);
+    if ( get_format_list( context, image_type, formatList, numFormats, flags ) )
+        return -1;
+    BufferOwningPtr<cl_image_format> formatListBuf(formatList);
+    if ((image_type == CL_MEM_OBJECT_IMAGE3D) && (flags != CL_MEM_READ_ONLY)) {
+        log_info("No requirement for 3D write in OpenCL 1.2. Not checking formats.\n");
+    } else {
+        log_info("Checking for required OpenCL 1.2 formats.\n");
+        if (check_minimum_supported( formatList, numFormats, flags, image_type, device ) == false) {
+            ret++;
+        } else {
+            log_info("All required formats present.\n");
+        }
+    }
+    filterFlags = new bool[ numFormats ];
+    BufferOwningPtr<bool> filterFlagsBuf(filterFlags);
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    filter_formats( formatList, filterFlags, numFormats, 0 );
     // Run the format list
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
         int test_return = 0;
         if( filterFlags[i] )
diff --git a/test_conformance/images/clReadWriteImage/main.cpp b/test_conformance/images/clReadWriteImage/main.cpp
index 18c7e23..d8d096e 100644
--- a/test_conformance/images/clReadWriteImage/main.cpp
+++ b/test_conformance/images/clReadWriteImage/main.cpp
@@ -13,20 +13,30 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "../harness/compat.h"
 #include <stdio.h>
 #include <string.h>
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
 #include "../testBase.h"
-#include "../harness/compat.h"
 bool gDebugTrace;
 bool gTestSmallImages;
 bool gTestMaxImages;
+bool gUseRamp;
 bool gTestMipmaps;
+int  gTypesToTest;
 cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
 cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
 bool            gEnablePitch = false;
 static void printUsage( const char *execName );
 extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_mem_object_type image_type );
@@ -89,6 +99,8 @@
             gTestMaxImages = true;
         else if( strcmp( argv[i], "use_pitches" ) == 0 )
             gEnablePitch = true;
+        else if( strcmp( argv[i], "use_ramps" ) == 0 )
+            gUseRamp = true;
         else if( strcmp( argv[i], "test_mipmaps") == 0 ) {
             gTestMipmaps = true;
             // Don't test pitches with mipmaps right now.
@@ -112,8 +124,7 @@
     if( gTestSmallImages )
         log_info( "Note: Using small test images\n" );
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
     return ret;
@@ -131,6 +142,7 @@
     log_info( "\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
     log_info( "\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
     log_info( "\tuse_pitches - Enables row and slice pitches\n" );
+    log_info( "\tuse_ramp - Instead of random data, uses images filled with ramps (and 0xff on any padding pixels) to ease debugging\n" );
     log_info( "\ttest_mipmaps - Test mipmapped images\n" );
     log_info( "\trandomize - Uses random seed\n" );
     log_info( "\n" );
diff --git a/test_conformance/images/clReadWriteImage/test_loops.cpp b/test_conformance/images/clReadWriteImage/test_loops.cpp
index 782e4b3..e8ca8c8 100644
--- a/test_conformance/images/clReadWriteImage/test_loops.cpp
+++ b/test_conformance/images/clReadWriteImage/test_loops.cpp
@@ -16,23 +16,21 @@
 #include "../testBase.h"
 #include "../common.h"
-extern int test_read_image_set_1D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_read_image_set_2D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_read_image_set_3D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        cl_image_format *format,
-                                        cl_mem_flags flags);
-extern int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        cl_image_format *format,
-                                        cl_mem_flags flags);
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern bool gDebugTrace;
+extern bool gTestMipmaps;
+extern int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
 int test_image_type( cl_device_id device, cl_context context, cl_command_queue queue, cl_mem_object_type imageType, cl_mem_flags flags )
@@ -40,76 +38,80 @@
     int ret = 0;
-    if (gTestMipmaps)
-    {
-        if (0 == is_extension_available(device, "cl_khr_mipmap_image"))
-        {
-            log_info("-----------------------------------------------------\n");
-            log_info("This device does not support "
-                     "cl_khr_mipmap_image.\nSkipping mipmapped image test. \n");
-            log_info(
-                "-----------------------------------------------------\n\n");
-            return 0;
-        }
-    }
     // Grab the list of supported image formats for integer reads
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, imageType, formatList, flags)) return -1;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
-    std::vector<bool> filterFlags(formatList.size(), false);
-    filter_formats(formatList, filterFlags, nullptr);
+  if ( gTestMipmaps )
+  {
+    if ( 0 == is_extension_available( device, "cl_khr_mipmap_image" ))
+    {
+      log_info( "-----------------------------------------------------\n" );
+      log_info( "This device does not support cl_khr_mipmap_image.\nSkipping mipmapped image test. \n" );
+      log_info( "-----------------------------------------------------\n\n" );
+      return 0;
+    }
+  }
+    if( get_format_list( context, imageType, formatList, numFormats, flags ) )
+        return -1;
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    filter_formats( formatList, filterFlags, numFormats, 0 );
     // Run the format list
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
         int test_return = 0;
-        if (filterFlags[i])
+        if( filterFlags[i] )
-            log_info("NOT RUNNING: ");
-            print_header(&formatList[i], false);
+            log_info( "NOT RUNNING: " );
+            print_header( &formatList[ i ], false );
-        print_header(&formatList[i], false);
+        print_header( &formatList[ i ], false );
-        switch (imageType)
-        {
+        switch (imageType) {
             case CL_MEM_OBJECT_IMAGE1D:
-                test_return = test_read_image_set_1D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_read_image_set_1D( device, context, queue, &formatList[ i ] );
             case CL_MEM_OBJECT_IMAGE2D:
-                test_return = test_read_image_set_2D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_read_image_set_2D( device, context, queue, &formatList[ i ] );
             case CL_MEM_OBJECT_IMAGE3D:
-                test_return = test_read_image_set_3D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_read_image_set_3D( device,context, queue,  &formatList[ i ] );
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                test_return = test_read_image_set_1D_array(
-                    device, context, queue, &formatList[i], flags);
+                test_return = test_read_image_set_1D_array( device, context, queue, &formatList[ i ] );
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                test_return = test_read_image_set_2D_array(
-                    device, context, queue, &formatList[i], flags);
+                test_return = test_read_image_set_2D_array( device, context, queue, &formatList[ i ] );
-        if (test_return)
-        {
+        if (test_return) {
-            log_error("FAILED: ");
-            print_header(&formatList[i], true);
-            log_info("\n");
+            log_error( "FAILED: " );
+            print_header( &formatList[ i ], true );
+            log_info( "\n" );
         ret += test_return;
+    delete[] filterFlags;
+    delete[] formatList;
     return ret;
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
index eef5bf4..7d9eb84 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
@@ -15,9 +15,16 @@
 #include "../testBase.h"
-int test_read_image_1D(cl_context context, cl_command_queue queue,
-                       image_descriptor *imageInfo, MTdata d,
-                       cl_mem_flags flags)
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+int test_read_image_1D( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
     int error;
@@ -36,14 +43,12 @@
     // Construct testing sources
-      image = create_image_1d(context, flags, imageInfo->format,
-                              imageInfo->width, 0, NULL, NULL, &error);
-      if (image == NULL)
-      {
-          log_error("ERROR: Unable to create 1D image of size %d (%s)",
-                    (int)imageInfo->width, IGetErrorString(error));
-          return -1;
-      }
+    image = create_image_1d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, 0, NULL, NULL, &error );
+    if( image == NULL )
+    {
+      log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( error ) );
+      return -1;
+    }
@@ -52,8 +57,7 @@
     image_desc.image_width = imageInfo->width;
     image_desc.num_mip_levels = imageInfo->num_mip_levels;
-    image = clCreateImage(context, flags, imageInfo->format, &image_desc, NULL,
-                          &error);
+    image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
     if( error != CL_SUCCESS )
       log_error( "ERROR: Unable to create %d level mipmapped 1D image of size %d x %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
@@ -163,9 +167,7 @@
     return 0;
-int test_read_image_set_1D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
     size_t maxWidth;
     cl_ulong maxAllocSize, memSize;
@@ -199,8 +201,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d\n", (int)imageInfo.width );
-            int ret =
-                test_read_image_1D(context, queue, &imageInfo, seed, flags);
+            int ret = test_read_image_1D( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
@@ -224,7 +225,7 @@
             log_info("Testing %d\n", (int)imageInfo.width);
             if( gDebugTrace )
                 log_info( "   at max size %d\n", (int)maxWidth );
-            if (test_read_image_1D(context, queue, &imageInfo, seed, flags))
+            if( test_read_image_1D( context, queue, &imageInfo, seed ) )
                 return -1;
@@ -260,8 +261,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
-            int ret =
-                test_read_image_1D(context, queue, &imageInfo, seed, flags);
+            int ret = test_read_image_1D( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
index 5d5c288..3f84556 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
@@ -15,9 +15,16 @@
 #include "../testBase.h"
-int test_read_image_1D_array(cl_context context, cl_command_queue queue,
-                             image_descriptor *imageInfo, MTdata d,
-                             cl_mem_flags flags)
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+int test_read_image_1D_array( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
     int error;
@@ -37,9 +44,7 @@
     // Construct testing sources
-        image = create_image_1d_array(context, flags, imageInfo->format,
-                                      imageInfo->width, imageInfo->arraySize, 0,
-                                      0, NULL, &error);
+        image = create_image_1d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->arraySize, 0, 0, NULL, &error );
         if( image == NULL )
             log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( error ) );
@@ -54,8 +59,7 @@
         image_desc.image_array_size = imageInfo->arraySize;
         image_desc.num_mip_levels = imageInfo->num_mip_levels;
-        image = clCreateImage(context, flags, imageInfo->format, &image_desc,
-                              NULL, &error);
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
         if( error != CL_SUCCESS )
             log_error( "ERROR: Unable to create %d level mipmapped 1D image of width %d and array size %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, IGetErrorString( error ) );
@@ -169,9 +173,7 @@
     return 0;
-int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 cl_image_format *format, cl_mem_flags flags)
+int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
     size_t maxWidth, maxArraySize;
     cl_ulong maxAllocSize, memSize;
@@ -208,8 +210,7 @@
                 if( gDebugTrace )
                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
-                int ret = test_read_image_1D_array(context, queue, &imageInfo,
-                                                   seed, flags);
+                int ret = test_read_image_1D_array( context, queue, &imageInfo, seed );
                 if( ret )
                     return -1;
@@ -236,8 +237,7 @@
             log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.arraySize);
             if( gDebugTrace )
                 log_info( "   at max size %d,%d\n", (int)maxWidth, (int)maxArraySize );
-            if (test_read_image_1D_array(context, queue, &imageInfo, seed,
-                                         flags))
+            if( test_read_image_1D_array( context, queue, &imageInfo, seed ) )
                 return -1;
@@ -275,8 +275,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
-            int ret = test_read_image_1D_array(context, queue, &imageInfo, seed,
-                                               flags);
+            int ret = test_read_image_1D_array( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
index fb2e794..0eae51b 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
@@ -15,9 +15,16 @@
 #include "../testBase.h"
-int test_read_image_2D(cl_context context, cl_command_queue queue,
-                       image_descriptor *imageInfo, MTdata d,
-                       cl_mem_flags flags)
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+int test_read_image_2D( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
     int error;
@@ -37,9 +44,7 @@
     // Construct testing sources
-        image =
-            create_image_2d(context, flags, imageInfo->format, imageInfo->width,
-                            imageInfo->height, 0, NULL, &error);
+        image = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, 0, NULL, &error );
         if( image == NULL )
             log_error( "ERROR: Unable to create 2D image of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, IGetErrorString( error ) );
@@ -54,8 +59,7 @@
         image_desc.image_height = imageInfo->height;
         image_desc.num_mip_levels = imageInfo->num_mip_levels;
-        image = clCreateImage(context, flags, imageInfo->format, &image_desc,
-                              NULL, &error);
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
         if( error != CL_SUCCESS )
             log_error( "ERROR: Unable to create %d level mipmapped 2D image of size %d x %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
@@ -172,9 +176,7 @@
     return 0;
-int test_read_image_set_2D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
     size_t maxWidth, maxHeight;
     cl_ulong maxAllocSize, memSize;
@@ -210,8 +212,7 @@
                 if( gDebugTrace )
                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
-                int ret =
-                    test_read_image_2D(context, queue, &imageInfo, seed, flags);
+                int ret = test_read_image_2D( context, queue, &imageInfo, seed );
                 if( ret )
                     return -1;
@@ -237,7 +238,7 @@
             log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.height);
             if( gDebugTrace )
                 log_info( "   at max size %d,%d\n", (int)maxWidth, (int)maxHeight );
-            if (test_read_image_2D(context, queue, &imageInfo, seed, flags))
+            if( test_read_image_2D( context, queue, &imageInfo, seed ) )
                 return -1;
@@ -273,8 +274,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
-            int ret =
-                test_read_image_2D(context, queue, &imageInfo, seed, flags);
+            int ret = test_read_image_2D( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
index d0113bb..547e5eb 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
@@ -15,9 +15,14 @@
 #include "../testBase.h"
-int test_read_image_2D_array(cl_context context, cl_command_queue queue,
-                             image_descriptor *imageInfo, MTdata d,
-                             cl_mem_flags flags)
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+int test_read_image_2D_array( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
     int error;
@@ -37,9 +42,7 @@
     // Construct testing sources
-        image = create_image_2d_array(context, flags, imageInfo->format,
-                                      imageInfo->width, imageInfo->height,
-                                      imageInfo->arraySize, 0, 0, NULL, &error);
+        image = create_image_2d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, NULL, &error );
         if( image == NULL )
             log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( error ) );
@@ -55,8 +58,7 @@
         image_desc.image_array_size = imageInfo->arraySize;
         image_desc.num_mip_levels = imageInfo->num_mip_levels;
-        image = clCreateImage(context, flags, imageInfo->format, &image_desc,
-                              NULL, &error);
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
         if( error != CL_SUCCESS )
             log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
@@ -147,9 +149,7 @@
     return 0;
-int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 cl_image_format *format, cl_mem_flags flags)
+int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
     size_t maxWidth, maxHeight, maxArraySize;
     cl_ulong maxAllocSize, memSize;
@@ -188,8 +188,7 @@
                     if( gDebugTrace )
                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
-                    int ret = test_read_image_2D_array(context, queue,
-                                                       &imageInfo, seed, flags);
+                    int ret = test_read_image_2D_array( context, queue, &imageInfo, seed );
                     if( ret )
                         return -1;
@@ -217,8 +216,7 @@
                 imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
             log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
-            if (test_read_image_2D_array(context, queue, &imageInfo, seed,
-                                         flags))
+            if( test_read_image_2D_array( context, queue, &imageInfo, seed ) )
                 return -1;
@@ -262,8 +260,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
-            int ret = test_read_image_2D_array(context, queue, &imageInfo, seed,
-                                               flags);
+            int ret = test_read_image_2D_array( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
index 2dcd243..16baeeb 100644
--- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
@@ -15,9 +15,14 @@
 #include "../testBase.h"
-int test_read_image_3D(cl_context context, cl_command_queue queue,
-                       image_descriptor *imageInfo, MTdata d,
-                       cl_mem_flags flags)
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+int test_read_image_3D( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
     int error;
@@ -36,9 +41,7 @@
     // Construct testing sources
-        image = create_image_3d(context, flags, imageInfo->format,
-                                imageInfo->width, imageInfo->height,
-                                imageInfo->depth, 0, 0, NULL, &error);
+        image = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, NULL, &error );
         if( image == NULL )
             log_error( "ERROR: Unable to create 2D image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( error ) );
@@ -54,8 +57,7 @@
         image_desc.image_depth = imageInfo->depth;
         image_desc.num_mip_levels = imageInfo->num_mip_levels;
-        image = clCreateImage(context, flags, imageInfo->format, &image_desc,
-                              NULL, &error);
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
         if( error != CL_SUCCESS )
             log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
@@ -152,9 +154,7 @@
     return 0;
-int test_read_image_set_3D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
     size_t maxWidth, maxHeight, maxDepth;
     cl_ulong maxAllocSize, memSize;
@@ -193,8 +193,7 @@
                     if( gDebugTrace )
                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
-                    int ret = test_read_image_3D(context, queue, &imageInfo,
-                                                 seed, flags);
+                    int ret = test_read_image_3D( context, queue, &imageInfo, seed );
                     if( ret )
                         return -1;
@@ -222,8 +221,8 @@
         imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth), seed);
       log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth);
-      if (test_read_image_3D(context, queue, &imageInfo, seed, flags))
-          return -1;
+      if( test_read_image_3D( context, queue, &imageInfo, seed ) )
+        return -1;
@@ -265,8 +264,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
-            int ret =
-                test_read_image_3D(context, queue, &imageInfo, seed, flags);
+            int ret = test_read_image_3D( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
diff --git a/test_conformance/images/common.cpp b/test_conformance/images/common.cpp
index 7323f11..9e54261 100644
--- a/test_conformance/images/common.cpp
+++ b/test_conformance/images/common.cpp
@@ -58,13 +58,27 @@
     { kTestFloat, kFloat, floatFormats, "float" },
 } };
-int filter_formats(const std::vector<cl_image_format> &formatList,
-                   std::vector<bool> &filterFlags,
+const char *convert_image_type_to_string(cl_mem_object_type image_type)
+    switch (image_type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D: return "1D";
+        case CL_MEM_OBJECT_IMAGE2D: return "2D";
+        case CL_MEM_OBJECT_IMAGE3D: return "3D";
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY: return "1D array";
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY: return "2D array";
+        case CL_MEM_OBJECT_IMAGE1D_BUFFER: return "1D image buffer";
+        default: return "unrecognized object type";
+    }
+int filter_formats(cl_image_format *formatList, bool *filterFlags,
+                   unsigned int formatCount,
                    cl_channel_type *channelDataTypesToFilter,
                    bool testMipmaps /*=false*/)
     int numSupported = 0;
-    for (unsigned int j = 0; j < formatList.size(); j++)
+    for (unsigned int j = 0; j < formatCount; j++)
         // If this format has been previously filtered, remove the filter
         if (filterFlags[j]) filterFlags[j] = false;
@@ -129,18 +143,18 @@
 int get_format_list(cl_context context, cl_mem_object_type imageType,
-                    std::vector<cl_image_format> &outFormatList,
-                    cl_mem_flags flags)
+                    cl_image_format *&outFormatList,
+                    unsigned int &outFormatCount, cl_mem_flags flags)
-    cl_uint formatCount;
     int error = clGetSupportedImageFormats(context, flags, imageType, 0, NULL,
-                                           &formatCount);
+                                           &outFormatCount);
     test_error(error, "Unable to get count of supported image formats");
-    outFormatList.resize(formatCount);
+    outFormatList =
+        (outFormatCount > 0) ? new cl_image_format[outFormatCount] : NULL;
-    error = clGetSupportedImageFormats(context, flags, imageType, formatCount,
-                             , NULL);
+    error = clGetSupportedImageFormats(context, flags, imageType,
+                                       outFormatCount, outFormatList, NULL);
     test_error(error, "Unable to get list of supported image formats");
     return 0;
diff --git a/test_conformance/images/common.h b/test_conformance/images/common.h
index 27e8679..114623e 100644
--- a/test_conformance/images/common.h
+++ b/test_conformance/images/common.h
@@ -22,7 +22,6 @@
 #include "harness/conversions.h"
 #include <array>
-#include <vector>
 extern cl_channel_type gChannelTypeToUse;
 extern cl_channel_order gChannelOrderToUse;
@@ -41,13 +40,14 @@
 extern std::array<ImageTestTypes, 3> imageTestTypes;
-int filter_formats(const std::vector<cl_image_format> &formatList,
-                   std::vector<bool> &filterFlags,
+const char *convert_image_type_to_string(cl_mem_object_type imageType);
+int filter_formats(cl_image_format *formatList, bool *filterFlags,
+                   unsigned int formatCount,
                    cl_channel_type *channelDataTypesToFilter,
                    bool testMipmaps = false);
 int get_format_list(cl_context context, cl_mem_object_type imageType,
-                    std::vector<cl_image_format> &outFormatList,
-                    cl_mem_flags flags);
+                    cl_image_format *&outFormatList,
+                    unsigned int &outFormatCount, cl_mem_flags flags);
 size_t random_in_ranges(size_t minimum, size_t rangeA, size_t rangeB, MTdata d);
 #endif // IMAGES_COMMON_H
diff --git a/test_conformance/images/kernel_image_methods/main.cpp b/test_conformance/images/kernel_image_methods/main.cpp
index 50653ef..ef6bd2c 100644
--- a/test_conformance/images/kernel_image_methods/main.cpp
+++ b/test_conformance/images/kernel_image_methods/main.cpp
@@ -13,16 +13,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "../harness/compat.h"
+#include "../harness/parseParameters.h"
 #include <stdio.h>
 #include <string.h>
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
 #include "../testBase.h"
-#include "../harness/compat.h"
-#include "../harness/parseParameters.h"
 bool gDebugTrace;
 bool gTestSmallImages;
 bool gTestMaxImages;
+int  gTypesToTest;
+bool gDeviceLt20 = false;
 cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
 cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
@@ -31,6 +39,8 @@
 static void printUsage( const char *execName );
 int test_1D(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
     return test_image_set( device, context, queue, CL_MEM_OBJECT_IMAGE1D );
@@ -111,8 +121,7 @@
     if( gTestSmallImages )
         log_info( "Note: Using small test images\n" );
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
     return ret;
diff --git a/test_conformance/images/kernel_image_methods/test_1D.cpp b/test_conformance/images/kernel_image_methods/test_1D.cpp
index 0059d4c..757a4a0 100644
--- a/test_conformance/images/kernel_image_methods/test_1D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D.cpp
@@ -15,6 +15,10 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages, gDeviceLt20;
 struct image_kernel_data
@@ -26,28 +30,24 @@
 static const char *methodTest1DImageKernelPattern =
-    "typedef struct {\n"
-    "    int width;\n"
-    "    int channelType;\n"
-    "    int channelOrder;\n"
-    "    int expectedChannelType;\n"
-    "    int expectedChannelOrder;\n"
-    " } image_kernel_data;\n"
-    "__kernel void sample_kernel( %s image1d_t input, __global "
-    "image_kernel_data *outData )\n"
-    "{\n"
-    "   outData->width = get_image_width( input );\n"
-    "   outData->channelType = get_image_channel_data_type( input );\n"
-    "   outData->channelOrder = get_image_channel_order( input );\n"
-    "\n"
-    "   outData->expectedChannelType = %s;\n"
-    "   outData->expectedChannelOrder = %s;\n"
-    "}";
+"typedef struct {\n"
+"    int width;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only image1d_t input, __global image_kernel_data *outData )\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
-static int test_get_1Dimage_info_single(cl_context context,
-                                        cl_command_queue queue,
-                                        image_descriptor *imageInfo, MTdata d,
-                                        cl_mem_flags flags)
+static int test_get_1Dimage_info_single( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
     int error = 0;
@@ -65,9 +65,7 @@
     // Construct testing source
     if( gDebugTrace )
         log_info( " - Creating 1D image %d ...\n", (int)imageInfo->width );
-    image = create_image_1d(context, flags, imageInfo->format, imageInfo->width,
-                            0, NULL, NULL, &error);
+    image = create_image_1d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, 0, NULL, NULL, &error );
     if( image == NULL )
         log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( error ) );
@@ -79,8 +77,6 @@
     const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
     const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
-    const char *image_access_qualifier =
-        (flags == CL_MEM_READ_ONLY) ? "read_only" : "write_only";
     if(channelTypeName && strlen(channelTypeName))
         sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
@@ -89,7 +85,7 @@
         sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
     // Create a program to run against
-    sprintf(programSrc, methodTest1DImageKernelPattern, image_access_qualifier,
+    sprintf( programSrc, methodTest1DImageKernelPattern,
             channelTypeConstantString, channelOrderConstantString);
     //log_info("-----------------------------------\n%s\n", programSrc);
@@ -97,13 +93,11 @@
     if (error)
         print_error(error, "clFinish failed.\n");
     const char *ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create kernel to test against" );
     // Create an output buffer
-    outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                   sizeof(outKernelData), NULL, &error);
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
     test_error( error, "Unable to create output buffer" );
     // Set up arguments and run
@@ -149,9 +143,7 @@
     return error;
-int test_get_image_info_1D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+int test_get_image_info_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
     size_t maxWidth;
     cl_ulong maxAllocSize, memSize;
@@ -181,8 +173,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d\n", (int)imageInfo.width );
-            int ret = test_get_1Dimage_info_single(context, queue, &imageInfo,
-                                                   seed, flags);
+            int ret = test_get_1Dimage_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
@@ -203,8 +194,7 @@
             log_info( "Testing %d\n", (int)sizes[ idx ][ 0 ]);
             if( gDebugTrace )
                 log_info( "   at max size %d\n", (int)sizes[ idx ][ 0 ] );
-            if (test_get_1Dimage_info_single(context, queue, &imageInfo, seed,
-                                             flags))
+            if( test_get_1Dimage_info_single( context, queue, &imageInfo, seed ) )
                 return -1;
@@ -233,8 +223,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
-            int ret = test_get_1Dimage_info_single(context, queue, &imageInfo,
-                                                   seed, flags);
+            int ret = test_get_1Dimage_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
diff --git a/test_conformance/images/kernel_image_methods/test_1D_array.cpp b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
index 797161c..f5e778b 100644
--- a/test_conformance/images/kernel_image_methods/test_1D_array.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
@@ -15,6 +15,10 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages, gDeviceLt20;
 struct image_kernel_data
@@ -27,30 +31,26 @@
 static const char *methodTestKernelPattern =
-    "typedef struct {\n"
-    "    int width;\n"
-    "    int arraySize;\n"
-    "    int channelType;\n"
-    "    int channelOrder;\n"
-    "    int expectedChannelType;\n"
-    "    int expectedChannelOrder;\n"
-    " } image_kernel_data;\n"
-    "__kernel void sample_kernel( %s image1d_array_t input, __global "
-    "image_kernel_data *outData )\n"
-    "{\n"
-    "   outData->width = get_image_width( input );\n"
-    "   outData->arraySize = get_image_array_size( input );\n"
-    "   outData->channelType = get_image_channel_data_type( input );\n"
-    "   outData->channelOrder = get_image_channel_order( input );\n"
-    "\n"
-    "   outData->expectedChannelType = %s;\n"
-    "   outData->expectedChannelOrder = %s;\n"
-    "}";
+"typedef struct {\n"
+"    int width;\n"
+"    int arraySize;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only image1d_array_t input, __global image_kernel_data *outData )\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->arraySize = get_image_array_size( input );\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
-int test_get_1Dimage_array_info_single(cl_context context,
-                                       cl_command_queue queue,
-                                       image_descriptor *imageInfo, MTdata d,
-                                       cl_mem_flags flags)
+int test_get_1Dimage_array_info_single( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
     int error = 0;
@@ -69,9 +69,7 @@
     if( gDebugTrace )
         log_info( " - Creating 1D image array %d by %d...\n", (int)imageInfo->width, (int)imageInfo->arraySize );
-    image = create_image_1d_array(context, flags, imageInfo->format,
-                                  imageInfo->width, imageInfo->arraySize, 0, 0,
-                                  NULL, &error);
+    image = create_image_1d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->arraySize, 0, 0, NULL, &error );
     if( image == NULL )
         log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( error ) );
@@ -83,8 +81,6 @@
     const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
     const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
-    const char *image_access_qualifier =
-        (flags == CL_MEM_READ_ONLY) ? "read_only" : "write_only";
     if(channelTypeName && strlen(channelTypeName))
         sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
@@ -93,7 +89,7 @@
         sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
     // Create a program to run against
-    sprintf(programSrc, methodTestKernelPattern, image_access_qualifier,
+    sprintf( programSrc, methodTestKernelPattern,
             channelTypeConstantString, channelOrderConstantString);
     //log_info("-----------------------------------\n%s\n", programSrc);
@@ -101,13 +97,11 @@
     if (error)
         print_error(error, "clFinish failed.\n");
     const char *ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create kernel to test against" );
     // Create an output buffer
-    outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                   sizeof(outKernelData), NULL, &error);
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
     test_error( error, "Unable to create output buffer" );
     // Set up arguments and run
@@ -158,9 +152,7 @@
     return error;
-int test_get_image_info_1D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 cl_image_format *format, cl_mem_flags flags)
+int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
     size_t maxWidth, maxArraySize;
     cl_ulong maxAllocSize, memSize;
@@ -194,8 +186,7 @@
                 if( gDebugTrace )
                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
-                int ret = test_get_1Dimage_array_info_single(
-                    context, queue, &imageInfo, seed, flags);
+                int ret = test_get_1Dimage_array_info_single( context, queue, &imageInfo, seed );
                 if( ret )
                     return -1;
@@ -219,8 +210,7 @@
             log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ]);
             if( gDebugTrace )
                 log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ] );
-            if (test_get_1Dimage_array_info_single(context, queue, &imageInfo,
-                                                   seed, flags))
+            if( test_get_1Dimage_array_info_single( context, queue, &imageInfo, seed ) )
                 return -1;
@@ -252,8 +242,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
-            int ret = test_get_1Dimage_array_info_single(
-                context, queue, &imageInfo, seed, flags);
+            int ret = test_get_1Dimage_array_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
diff --git a/test_conformance/images/kernel_image_methods/test_2D.cpp b/test_conformance/images/kernel_image_methods/test_2D.cpp
index b0d4a70..64b9f26 100644
--- a/test_conformance/images/kernel_image_methods/test_2D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_2D.cpp
@@ -15,6 +15,10 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages, gDeviceLt20;
 struct image_kernel_data
@@ -31,42 +35,38 @@
 static const char *methodTestKernelPattern =
-    "typedef struct {\n"
-    "    int width;\n"
-    "    int height;\n"
-    "    int depth;\n"
-    "    int widthDim;\n"
-    "    int heightDim;\n"
-    "    int depthDim;\n"
-    "    int channelType;\n"
-    "    int channelOrder;\n"
-    "    int expectedChannelType;\n"
-    "    int expectedChannelOrder;\n"
-    " } image_kernel_data;\n"
-    " %s\n"
-    "__kernel void sample_kernel( %s image%dd%s_t input, __global "
-    "image_kernel_data *outData )\n"
-    "{\n"
-    "   outData->width = get_image_width( input );\n"
-    "   outData->height = get_image_height( input );\n"
-    "%s\n"
-    "   int%d dim = get_image_dim( input );\n"
-    "   outData->widthDim = dim.x;\n"
-    "   outData->heightDim = dim.y;\n"
-    "%s\n"
-    "   outData->channelType = get_image_channel_data_type( input );\n"
-    "   outData->channelOrder = get_image_channel_order( input );\n"
-    "\n"
-    "   outData->expectedChannelType = %s;\n"
-    "   outData->expectedChannelOrder = %s;\n"
-    "}";
+"typedef struct {\n"
+"    int width;\n"
+"    int height;\n"
+"    int depth;\n"
+"    int widthDim;\n"
+"    int heightDim;\n"
+"    int depthDim;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only image%dd%s_t input, __global image_kernel_data *outData )\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->height = get_image_height( input );\n"
+"   int%d dim = get_image_dim( input );\n"
+"   outData->widthDim = dim.x;\n"
+"   outData->heightDim = dim.y;\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
 static const char *depthKernelLine = "   outData->depth = get_image_depth( input );\n";
 static const char *depthDimKernelLine = "   outData->depthDim = dim.z;\n";
-int test_get_image_info_single(cl_context context, cl_command_queue queue,
-                               image_descriptor *imageInfo, MTdata d,
-                               cl_mem_flags flags)
+int test_get_image_info_single( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
     int error = 0;
@@ -86,13 +86,9 @@
         log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
     if( imageInfo->depth != 0 )
-        image = create_image_3d(context, flags, imageInfo->format,
-                                imageInfo->width, imageInfo->height,
-                                imageInfo->depth, 0, 0, NULL, &error);
+        image = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, NULL, &error );
-        image =
-            create_image_2d(context, flags, imageInfo->format, imageInfo->width,
-                            imageInfo->height, 0, NULL, &error);
+        image = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, 0, NULL, &error );
     if( image == NULL )
         log_error( "ERROR: Unable to create image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( error ) );
@@ -104,12 +100,6 @@
     const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
     const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
-    const char *image_access_qualifier =
-        (flags == CL_MEM_READ_ONLY) ? "read_only" : "write_only";
-    const char *cl_khr_3d_image_writes_enabler = "";
-    if ((flags != CL_MEM_READ_ONLY) && (imageInfo->depth != 0))
-        cl_khr_3d_image_writes_enabler =
-            "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable";
     if(channelTypeName && strlen(channelTypeName))
         sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
@@ -118,13 +108,12 @@
         sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
     // Create a program to run against
-    sprintf(programSrc, methodTestKernelPattern, cl_khr_3d_image_writes_enabler,
-            image_access_qualifier, (imageInfo->depth != 0) ? 3 : 2,
-            (imageInfo->format->image_channel_order == CL_DEPTH) ? "_depth"
-                                                                 : "",
-            (imageInfo->depth != 0) ? depthKernelLine : "",
-            (imageInfo->depth != 0) ? 4 : 2,
-            (imageInfo->depth != 0) ? depthDimKernelLine : "",
+    sprintf( programSrc, methodTestKernelPattern,
+            ( imageInfo->depth != 0 ) ? 3 : 2,
+            (imageInfo->format->image_channel_order == CL_DEPTH) ? "_depth" : "",
+            ( imageInfo->depth != 0 ) ? depthKernelLine : "",
+            ( imageInfo->depth != 0 ) ? 4 : 2,
+            ( imageInfo->depth != 0 ) ? depthDimKernelLine : "",
             channelTypeConstantString, channelOrderConstantString);
     //log_info("-----------------------------------\n%s\n", programSrc);
@@ -132,13 +121,11 @@
     if (error)
         print_error(error, "clFinish failed.\n");
     const char *ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create kernel to test against" );
     // Create an output buffer
-    outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                   sizeof(outKernelData), NULL, &error);
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
     test_error( error, "Unable to create output buffer" );
     // Set up arguments and run
@@ -209,9 +196,7 @@
     return error;
-int test_get_image_info_2D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+int test_get_image_info_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
     size_t maxWidth, maxHeight;
     cl_ulong maxAllocSize, memSize;
@@ -244,8 +229,7 @@
                 if( gDebugTrace )
                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
-                int ret = test_get_image_info_single(context, queue, &imageInfo,
-                                                     seed, flags);
+                int ret = test_get_image_info_single( context, queue, &imageInfo, seed );
                 if( ret )
                     return -1;
@@ -268,8 +252,7 @@
             log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
             if( gDebugTrace )
                 log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
-            if (test_get_image_info_single(context, queue, &imageInfo, seed,
-                                           flags))
+            if( test_get_image_info_single( context, queue, &imageInfo, seed ) )
                 return -1;
@@ -299,8 +282,7 @@
             if( gDebugTrace )
                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
-            int ret = test_get_image_info_single(context, queue, &imageInfo,
-                                                 seed, flags);
+            int ret = test_get_image_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
diff --git a/test_conformance/images/kernel_image_methods/test_2D_array.cpp b/test_conformance/images/kernel_image_methods/test_2D_array.cpp
index 21a6b04..85b8a7a 100644
--- a/test_conformance/images/kernel_image_methods/test_2D_array.cpp
+++ b/test_conformance/images/kernel_image_methods/test_2D_array.cpp
@@ -15,6 +15,10 @@
 #include "../testBase.h"
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages, gDeviceLt20;
 struct image_kernel_data
@@ -28,32 +32,28 @@
 static const char *methodTestKernelPattern =
-    "typedef struct {\n"
-    "    int width;\n"
-    "    int height;\n"
-    "    int arraySize;\n"
-    "    int channelType;\n"
-    "    int channelOrder;\n"
-    "    int expectedChannelType;\n"
-    "    int expectedChannelOrder;\n"
-    " } image_kernel_data;\n"
-    "__kernel void sample_kernel( %s %s input, __global image_kernel_data "
-    "*outData )\n"
-    "{\n"
-    "   outData->width = get_image_width( input );\n"
-    "   outData->height = get_image_height( input );\n"
-    "   outData->arraySize = get_image_array_size( input );\n"
-    "   outData->channelType = get_image_channel_data_type( input );\n"
-    "   outData->channelOrder = get_image_channel_order( input );\n"
-    "\n"
-    "   outData->expectedChannelType = %s;\n"
-    "   outData->expectedChannelOrder = %s;\n"
-    "}";
+"typedef struct {\n"
+"    int width;\n"
+"    int height;\n"
+"    int arraySize;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only %s input, __global image_kernel_data *outData )\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->height = get_image_height( input );\n"
+"   outData->arraySize = get_image_array_size( input );\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
-int test_get_2Dimage_array_info_single(cl_context context,
-                                       cl_command_queue queue,
-                                       image_descriptor *imageInfo, MTdata d,
-                                       cl_mem_flags flags)
+int test_get_2Dimage_array_info_single( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d )
     int error = 0;
@@ -72,9 +72,7 @@
     if( gDebugTrace )
         log_info( " - Creating 2D image array %d by %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
-    image = create_image_2d_array(context, flags, imageInfo->format,
-                                  imageInfo->width, imageInfo->height,
-                                  imageInfo->arraySize, 0, 0, NULL, &error);
+    image = create_image_2d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, NULL, &error );
     if( image == NULL )
         log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( error ) );
@@ -86,8 +84,6 @@
     const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
     const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
-    const char *image_access_qualifier =
-        (flags == CL_MEM_READ_ONLY) ? "read_only" : "write_only";
     if(channelTypeName && strlen(channelTypeName))
         sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
@@ -96,10 +92,8 @@
         sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
     // Create a program to run against
-    sprintf(programSrc, methodTestKernelPattern, image_access_qualifier,
-            (imageInfo->format->image_channel_order == CL_DEPTH)
-                ? "image2d_array_depth_t"
-                : "image2d_array_t",
+    sprintf( programSrc, methodTestKernelPattern,
+            (imageInfo->format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t" ,
             channelTypeConstantString, channelOrderConstantString);
     //log_info("-----------------------------------\n%s\n", programSrc);
@@ -107,13 +101,11 @@
     if (error)
         print_error(error, "clFinish failed.\n");
     const char *ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create kernel to test against" );
     // Create an output buffer
-    outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                   sizeof(outKernelData), NULL, &error);
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
     test_error( error, "Unable to create output buffer" );
     // Set up arguments and run
@@ -169,9 +161,7 @@
     return error;
-int test_get_image_info_2D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 cl_image_format *format, cl_mem_flags flags)
+int test_get_image_info_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
     size_t maxWidth, maxHeight, maxArraySize;
     cl_ulong maxAllocSize, memSize;
@@ -207,8 +197,7 @@
                     if( gDebugTrace )
                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
-                    int ret = test_get_2Dimage_array_info_single(
-                        context, queue, &imageInfo, seed, flags);
+                    int ret = test_get_2Dimage_array_info_single( context, queue, &imageInfo, seed );
                     if( ret )
                         return -1;
@@ -234,8 +223,7 @@
             log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
             if( gDebugTrace )
                 log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
-            if (test_get_2Dimage_array_info_single(context, queue, &imageInfo,
-                                                   seed, flags))
+            if( test_get_2Dimage_array_info_single( context, queue, &imageInfo, seed ) )
                 return -1;
@@ -244,9 +232,6 @@
         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
             cl_ulong size;
-            cl_ulong slicePitch;
-            cl_ulong rowPitch;
             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
             // image, the result array, plus offset arrays, will fit in the global ram space
@@ -255,30 +240,26 @@
                 imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
                 imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
-                rowPitch = imageInfo.width * pixelSize;
-                slicePitch = rowPitch * imageInfo.height;
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
                 size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
-                rowPitch += extraWidth;
+                imageInfo.rowPitch += extraWidth;
                 do {
-                    rowPitch += extraWidth;
-                } while ((rowPitch % pixelSize) != 0);
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
                 size_t extraHeight = (int)random_log_in_range( 0, 8, seed );
-                slicePitch = rowPitch * (imageInfo.height + extraHeight);
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
-                size = slicePitch * imageInfo.arraySize * 4 * 4;
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
-            imageInfo.slicePitch = slicePitch;
-            imageInfo.rowPitch = rowPitch;
             if( gDebugTrace )
                 log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
-            int ret = test_get_2Dimage_array_info_single(
-                context, queue, &imageInfo, seed, flags);
+            int ret = test_get_2Dimage_array_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
diff --git a/test_conformance/images/kernel_image_methods/test_3D.cpp b/test_conformance/images/kernel_image_methods/test_3D.cpp
index aae433b..390bf8f 100644
--- a/test_conformance/images/kernel_image_methods/test_3D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_3D.cpp
@@ -15,14 +15,14 @@
 #include "../testBase.h"
-extern int test_get_image_info_single(cl_context context,
-                                      cl_command_queue queue,
-                                      image_descriptor *imageInfo, MTdata d,
-                                      cl_mem_flags flags);
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
-int test_get_image_info_3D(cl_device_id device, cl_context context,
-                           cl_command_queue queue, cl_image_format *format,
-                           cl_mem_flags flags)
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages, gDeviceLt20;
+extern int test_get_image_info_single( cl_context context, cl_command_queue queue, image_descriptor *imageInfo, MTdata d );
+int test_get_image_info_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format )
     size_t maxWidth, maxHeight, maxDepth;
     cl_ulong maxAllocSize, memSize;
@@ -30,16 +30,6 @@
     RandomSeed seed( gRandomSeed );
     size_t pixelSize;
-    if ((flags != CL_MEM_READ_ONLY)
-        && !is_extension_available(device, "cl_khr_3d_image_writes"))
-    {
-        log_info("-----------------------------------------------------\n");
-        log_info("This device does not support cl_khr_3d_image_writes.\n"
-                 "Skipping 3d image write test.\n");
-        log_info("-----------------------------------------------------\n\n");
-        return 0;
-    }
     imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
     imageInfo.format = format;
     pixelSize = get_pixel_size( imageInfo.format );
@@ -68,8 +58,7 @@
                     if( gDebugTrace )
                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
-                    int ret = test_get_image_info_single(
-                        context, queue, &imageInfo, seed, flags);
+                    int ret = test_get_image_info_single( context, queue, &imageInfo, seed );
                     if( ret )
                         return -1;
@@ -95,8 +84,7 @@
             log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
             if( gDebugTrace )
                 log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
-            if (test_get_image_info_single(context, queue, &imageInfo, seed,
-                                           flags))
+            if( test_get_image_info_single( context, queue, &imageInfo, seed ) )
                 return -1;
@@ -105,9 +93,6 @@
         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
             cl_ulong size;
-            cl_ulong slicePitch;
-            cl_ulong rowPitch;
             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
             // image, the result array, plus offset arrays, will fit in the global ram space
@@ -116,30 +101,26 @@
                 imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
                 imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
-                rowPitch = imageInfo.width * pixelSize;
-                slicePitch = imageInfo.rowPitch * imageInfo.height;
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
                 size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
-                rowPitch += extraWidth;
+                imageInfo.rowPitch += extraWidth;
                 do {
-                    rowPitch += extraWidth;
-                } while ((rowPitch % pixelSize) != 0);
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
                 size_t extraHeight = (int)random_log_in_range( 0, 8, seed );
-                slicePitch = rowPitch * (imageInfo.height + extraHeight);
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
-                size = slicePitch * imageInfo.depth * 4 * 4;
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4;
             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
-            imageInfo.slicePitch = slicePitch;
-            imageInfo.rowPitch = rowPitch;
             if( gDebugTrace )
                 log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
-            int ret = test_get_image_info_single(context, queue, &imageInfo,
-                                                 seed, flags);
+            int ret = test_get_image_info_single( context, queue, &imageInfo, seed );
             if( ret )
                 return -1;
diff --git a/test_conformance/images/kernel_image_methods/test_loops.cpp b/test_conformance/images/kernel_image_methods/test_loops.cpp
index 1d892a9..3b56d3e 100644
--- a/test_conformance/images/kernel_image_methods/test_loops.cpp
+++ b/test_conformance/images/kernel_image_methods/test_loops.cpp
@@ -16,24 +16,20 @@
 #include "../testBase.h"
 #include "../common.h"
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern bool gDeviceLt20;
-extern int test_get_image_info_1D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_get_image_info_2D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_get_image_info_3D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  cl_image_format *format, cl_mem_flags flags);
-extern int test_get_image_info_1D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        cl_image_format *format,
-                                        cl_mem_flags flags);
-extern int test_get_image_info_2D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        cl_image_format *format,
-                                        cl_mem_flags flags);
+extern bool gDebugTrace;
+extern int test_get_image_info_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_get_image_info_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_get_image_info_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
+extern int test_get_image_info_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format );
 int test_image_type( cl_device_id device, cl_context context, cl_command_queue queue, cl_mem_object_type imageType, cl_mem_flags flags )
@@ -42,14 +38,24 @@
     int ret = 0;
     // Grab the list of supported image formats for integer reads
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, imageType, formatList, flags)) return -1;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
-    std::vector<bool> filterFlags(formatList.size(), false);
-    filter_formats(formatList, filterFlags, nullptr);
+    if( get_format_list( context, imageType, formatList, numFormats, flags ) )
+        return -1;
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    filter_formats( formatList, filterFlags, numFormats, 0 );
     // Run the format list
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
         int test_return = 0;
         if( filterFlags[i] )
@@ -65,24 +71,19 @@
         switch (imageType) {
             case CL_MEM_OBJECT_IMAGE1D:
-                test_return = test_get_image_info_1D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_get_image_info_1D( device, context, queue, &formatList[ i ] );
             case CL_MEM_OBJECT_IMAGE2D:
-                test_return = test_get_image_info_2D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_get_image_info_2D( device, context, queue, &formatList[ i ] );
             case CL_MEM_OBJECT_IMAGE3D:
-                test_return = test_get_image_info_3D(device, context, queue,
-                                                     &formatList[i], flags);
+                test_return = test_get_image_info_3D( device, context, queue, &formatList[ i ] );
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                test_return = test_get_image_info_1D_array(
-                    device, context, queue, &formatList[i], flags);
+                test_return = test_get_image_info_1D_array( device, context, queue, &formatList[ i ] );
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                test_return = test_get_image_info_2D_array(
-                    device, context, queue, &formatList[i], flags);
+                test_return = test_get_image_info_2D_array( device, context, queue, &formatList[ i ] );
@@ -96,6 +97,9 @@
         ret += test_return;
+    delete filterFlags;
+    delete formatList;
     return ret;
@@ -103,6 +107,9 @@
     int version_check;
     auto version = get_device_cl_version(device);
+    if (version < Version(2, 0)) {
+        gDeviceLt20 = true;
+    }
     if ((version_check = (version < Version(1, 2))))
diff --git a/test_conformance/images/kernel_read_write/main.cpp b/test_conformance/images/kernel_read_write/main.cpp
index 31dceb3..51d5c07 100644
--- a/test_conformance/images/kernel_read_write/main.cpp
+++ b/test_conformance/images/kernel_read_write/main.cpp
@@ -13,14 +13,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "../harness/compat.h"
 #include <stdio.h>
 #include <string.h>
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
 #include "../testBase.h"
-#include "../harness/compat.h"
 #include "../harness/fpcontrol.h"
 #include "../harness/parseParameters.h"
+#include <vector>
 #if defined(__PPC__)
 // Global varaiable used to hold the FPU control register state. The FPSCR register can not
 // be used because not all Power implementations retain or observed the NI (non-IEEE
@@ -35,6 +43,7 @@
 bool gTestMaxImages;
 bool gTestImage2DFromBuffer;
 bool gTestMipmaps;
+bool gDeviceLt20 = false;
 cl_filter_mode    gFilterModeToUse = (cl_filter_mode)-1;
 // Default is CL_MEM_USE_HOST_PTR for the test
 cl_mem_flags    gMemFlagsToUse = CL_MEM_USE_HOST_PTR;
@@ -49,6 +58,8 @@
 int             gtestTypesToRun = 0;
 static int testTypesToRun;
 static void printUsage( const char *execName );
 extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, test_format_set_fn formatTestFn, cl_mem_object_type imageType );
@@ -106,6 +117,10 @@
     bool            tDisableOffsets = false;
     bool            tNormalizedModeToUse = false;
     cl_filter_mode  tFilterModeToUse = (cl_filter_mode)-1;
+    auto version = get_device_cl_version(device);
+    if (version < Version(2, 0)) {
+        gDeviceLt20 = true;
+    }
     if( testTypesToRun & kReadTests )
@@ -162,10 +177,11 @@
-    if ((testTypesToRun & kReadWriteTests)
-        && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
+    if (testTypesToRun & kReadWriteTests) {
+        if (gDeviceLt20)  {
+            log_info("TEST skipped, Opencl 2.0 + requried for this test");
+            return ret;
+        }
     if( ( testTypesToRun & kReadWriteTests ) && !gTestMipmaps )
@@ -390,8 +406,7 @@
     FPU_mode_type oldMode;
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
     // Restore FP state before leaving
diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp
index e76710b..5182601 100644
--- a/test_conformance/images/kernel_read_write/test_common.cpp
+++ b/test_conformance/images/kernel_read_write/test_common.cpp
@@ -1,22 +1,6 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
 #include "test_common.h"
 cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) {
     cl_sampler sampler = nullptr;
     if (test_mipmaps) {
@@ -33,1514 +17,3 @@
     return sampler;
-void InitFloatCoordsCommon(image_descriptor *imageInfo,
-                           image_sampler_data *imageSampler, float *xOffsets,
-                           float *yOffsets, float *zOffsets, float xfract,
-                           float yfract, float zfract, int normalized_coords,
-                           MTdata d, int lod)
-    size_t i = 0;
-    if (gDisableOffsets)
-    {
-        for (size_t z = 0; z < imageInfo->depth; z++)
-        {
-            for (size_t y = 0; y < imageInfo->height; y++)
-            {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
-                {
-                    xOffsets[i] = (float)(xfract + (double)x);
-                    yOffsets[i] = (float)(yfract + (double)y);
-                    zOffsets[i] = (float)(zfract + (double)z);
-                }
-            }
-        }
-    }
-    else
-    {
-        for (size_t z = 0; z < imageInfo->depth; z++)
-        {
-            for (size_t y = 0; y < imageInfo->height; y++)
-            {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
-                {
-                    xOffsets[i] =
-                        (float)(xfract
-                                + (double)((int)x
-                                           + random_in_range(-10, 10, d)));
-                    yOffsets[i] =
-                        (float)(yfract
-                                + (double)((int)y
-                                           + random_in_range(-10, 10, d)));
-                    zOffsets[i] =
-                        (float)(zfract
-                                + (double)((int)z
-                                           + random_in_range(-10, 10, d)));
-                }
-            }
-        }
-    }
-    if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
-    {
-        i = 0;
-        for (size_t z = 0; z < imageInfo->depth; z++)
-        {
-            for (size_t y = 0; y < imageInfo->height; y++)
-            {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
-                {
-                    xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
-                                               (double)imageInfo->width - 1.0);
-                    yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
-                                               (double)imageInfo->height - 1.0);
-                    zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
-                                               (double)imageInfo->depth - 1.0);
-                }
-            }
-        }
-    }
-    if (normalized_coords || gTestMipmaps)
-    {
-        i = 0;
-        if (lod == 0)
-        {
-            for (size_t z = 0; z < imageInfo->depth; z++)
-            {
-                for (size_t y = 0; y < imageInfo->height; y++)
-                {
-                    for (size_t x = 0; x < imageInfo->width; x++, i++)
-                    {
-                        xOffsets[i] = (float)((double)xOffsets[i]
-                                              / (double)imageInfo->width);
-                        yOffsets[i] = (float)((double)yOffsets[i]
-                                              / (double)imageInfo->height);
-                        zOffsets[i] = (float)((double)zOffsets[i]
-                                              / (double)imageInfo->depth);
-                    }
-                }
-            }
-        }
-        else if (gTestMipmaps)
-        {
-            size_t width_lod, height_lod, depth_lod;
-            width_lod =
-                (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height_lod =
-                (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            depth_lod =
-                (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-            for (size_t z = 0; z < depth_lod; z++)
-            {
-                for (size_t y = 0; y < height_lod; y++)
-                {
-                    for (size_t x = 0; x < width_lod; x++, i++)
-                    {
-                        xOffsets[i] =
-                            (float)((double)xOffsets[i] / (double)width_lod);
-                        yOffsets[i] =
-                            (float)((double)yOffsets[i] / (double)height_lod);
-                        zOffsets[i] =
-                            (float)((double)zOffsets[i] / (double)depth_lod);
-                    }
-                }
-            }
-        }
-    }
-int test_read_image(cl_context context, cl_command_queue queue,
-                    cl_kernel kernel, image_descriptor *imageInfo,
-                    image_sampler_data *imageSampler, bool useFloatCoords,
-                    ExplicitType outputType, MTdata d)
-    int error;
-    size_t threads[3];
-    static int initHalf = 0;
-    cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
-    clMemWrapper xOffsets, yOffsets, zOffsets, results;
-    clSamplerWrapper actualSampler;
-    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
-    // Create offset data
-    BufferOwningPtr<cl_float> xOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
-    BufferOwningPtr<cl_float> yOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
-    BufferOwningPtr<cl_float> zOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
-    if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
-        if (DetectFloatToHalfRoundingMode(queue)) return 1;
-    BufferOwningPtr<char> imageValues;
-    generate_random_image_data(imageInfo, imageValues, d);
-    // Construct testing sources
-    clProtectedImage protImage;
-    clMemWrapper unprotImage;
-    cl_mem image;
-    if (gtestTypesToRun & kReadTests)
-    {
-        image_read_write_flags = CL_MEM_READ_ONLY;
-    }
-    else
-    {
-        image_read_write_flags = CL_MEM_READ_WRITE;
-    }
-    if (gMemFlagsToUse == CL_MEM_USE_HOST_PTR)
-    {
-        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the
-        // testing (via Ian) Do not use protected images for max image size test
-        // since it rounds the row size to a page size
-        if (gTestMaxImages)
-        {
-            generate_random_image_data(imageInfo,
-                                       maxImageUseHostPtrBackingStore, d);
-            unprotImage = create_image_3d(
-                context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
-                imageInfo->format, imageInfo->width, imageInfo->height,
-                imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
-                (gEnablePitch ? imageInfo->slicePitch : 0),
-                maxImageUseHostPtrBackingStore, &error);
-        }
-        else
-        {
-            error = protImage.Create(context, image_read_write_flags,
-                                     imageInfo->format, imageInfo->width,
-                                     imageInfo->height, imageInfo->depth);
-        }
-        if (error != CL_SUCCESS)
-        {
-            log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
-                      "(pitch %d, %d ) (%s)",
-                      (int)imageInfo->width, (int)imageInfo->height,
-                      (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                      (int)imageInfo->slicePitch, IGetErrorString(error));
-            return error;
-        }
-        if (gTestMaxImages)
-            image = (cl_mem)unprotImage;
-        else
-            image = (cl_mem)protImage;
-    }
-    else if (gMemFlagsToUse == CL_MEM_COPY_HOST_PTR)
-    {
-        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data
-        // in
-        unprotImage = create_image_3d(
-            context, image_read_write_flags | CL_MEM_COPY_HOST_PTR,
-            imageInfo->format, imageInfo->width, imageInfo->height,
-            imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
-            (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
-        if (error != CL_SUCCESS)
-        {
-            log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
-                      "(pitch %d, %d ) (%s)",
-                      (int)imageInfo->width, (int)imageInfo->height,
-                      (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                      (int)imageInfo->slicePitch, IGetErrorString(error));
-            return error;
-        }
-        image = unprotImage;
-    }
-    else // Either CL_MEM_ALLOC_HOST_PTR or none
-    {
-        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can
-        // be accessed by the host, but otherwise it works just as if no flag is
-        // specified, so we just do the same thing either way
-        if (!gTestMipmaps)
-        {
-            unprotImage = create_image_3d(
-                context, image_read_write_flags | gMemFlagsToUse,
-                imageInfo->format, imageInfo->width, imageInfo->height,
-                imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
-                (gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
-                &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error("ERROR: Unable to create 3D image of size %d x %d x "
-                          "%d (pitch %d, %d ) (%s)",
-                          (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                          (int)imageInfo->slicePitch, IGetErrorString(error));
-                return error;
-            }
-            image = unprotImage;
-        }
-        else
-        {
-            cl_image_desc image_desc = { 0 };
-            image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
-            image_desc.image_width = imageInfo->width;
-            image_desc.image_height = imageInfo->height;
-            image_desc.image_depth = imageInfo->depth;
-            image_desc.num_mip_levels = imageInfo->num_mip_levels;
-            unprotImage =
-                clCreateImage(context, image_read_write_flags,
-                              imageInfo->format, &image_desc, NULL, &error);
-            if (error != CL_SUCCESS)
-            {
-                log_error("ERROR: Unable to create %d level mipmapped 3D image "
-                          "of size %d x %d x %d (pitch %d, %d ) (%s)",
-                          (int)imageInfo->num_mip_levels, (int)imageInfo->width,
-                          (int)imageInfo->height, (int)imageInfo->depth,
-                          (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
-                          IGetErrorString(error));
-                return error;
-            }
-            image = unprotImage;
-        }
-    }
-    if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
-    {
-        size_t origin[4] = { 0, 0, 0, 0 };
-        size_t region[3] = { imageInfo->width, imageInfo->height,
-                             imageInfo->depth };
-        if (gDebugTrace) log_info(" - Writing image...\n");
-        if (!gTestMipmaps)
-        {
-            error =
-                clEnqueueWriteImage(queue, image, CL_TRUE, origin, region,
-                                    gEnablePitch ? imageInfo->rowPitch : 0,
-                                    gEnablePitch ? imageInfo->slicePitch : 0,
-                                    imageValues, 0, NULL, NULL);
-            if (error != CL_SUCCESS)
-            {
-                log_error("ERROR: Unable to write to 3D image of size %d x %d "
-                          "x %d \n",
-                          (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->depth);
-                return error;
-            }
-        }
-        else
-        {
-            int nextLevelOffset = 0;
-            for (int i = 0; i < imageInfo->num_mip_levels; i++)
-            {
-                origin[3] = i;
-                error = clEnqueueWriteImage(
-                    queue, image, CL_TRUE, origin, region,
-                    /*gEnablePitch ? imageInfo->rowPitch :*/ 0,
-                    /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
-                    ((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
-                if (error != CL_SUCCESS)
-                {
-                    log_error("ERROR: Unable to write to %d level mipmapped 3D "
-                              "image of size %d x %d x %d\n",
-                              (int)imageInfo->num_mip_levels,
-                              (int)imageInfo->width, (int)imageInfo->height,
-                              (int)imageInfo->depth);
-                    return error;
-                }
-                nextLevelOffset += region[0] * region[1] * region[2]
-                    * get_pixel_size(imageInfo->format);
-                // Subsequent mip level dimensions keep halving
-                region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
-                region[1] = region[1] >> 1 ? region[1] >> 1 : 1;
-                region[2] = region[2] >> 1 ? region[2] >> 1 : 1;
-            }
-        }
-    }
-    xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              xOffsetValues, &error);
-    test_error(error, "Unable to create x offset buffer");
-    yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              yOffsetValues, &error);
-    test_error(error, "Unable to create y offset buffer");
-    zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              zOffsetValues, &error);
-    test_error(error, "Unable to create y offset buffer");
-    results =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       get_explicit_type_size(outputType) * 4 * imageInfo->width
-                           * imageInfo->height * imageInfo->depth,
-                       NULL, &error);
-    test_error(error, "Unable to create result buffer");
-    // Create sampler to use
-    actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
-    test_error(error, "Unable to create image sampler");
-    // Set arguments
-    int idx = 0;
-    error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &image);
-    test_error(error, "Unable to set kernel arguments");
-    if (!gUseKernelSamplers)
-    {
-        error =
-            clSetKernelArg(kernel, idx++, sizeof(cl_sampler), &actualSampler);
-        test_error(error, "Unable to set kernel arguments");
-    }
-    error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &xOffsets);
-    test_error(error, "Unable to set kernel arguments");
-    error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets);
-    test_error(error, "Unable to set kernel arguments");
-    error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets);
-    test_error(error, "Unable to set kernel arguments");
-    error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &results);
-    test_error(error, "Unable to set kernel arguments");
-    const float float_offsets[] = { 0.0f,
-                                    MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30),
-                                    0.25f,
-                                    0.3f,
-                                    0.5f - FLT_EPSILON / 4.0f,
-                                    0.5f,
-                                    0.9f,
-                                    1.0f - FLT_EPSILON / 2 };
-    int float_offset_count = sizeof(float_offsets) / sizeof(float_offsets[0]);
-    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
-    int loopCount = 2 * float_offset_count;
-    if (!useFloatCoords) loopCount = 1;
-    if (gTestMaxImages)
-    {
-        loopCount = 1;
-        log_info("Testing each size only once with pixel offsets of %g for max "
-                 "sized images.\n",
-                 float_offsets[0]);
-    }
-    // Get the maximum absolute error for this format
-    double formatAbsoluteError =
-        get_max_absolute_error(imageInfo->format, imageSampler);
-    if (gDebugTrace)
-        log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
-    if (0 == initHalf
-        && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
-    {
-        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode(queue);
-        if (initHalf)
-        {
-            log_info("Half rounding mode successfully detected.\n");
-        }
-    }
-    int nextLevelOffset = 0;
-    size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
-           depth_lod = imageInfo->depth;
-    // Loop over all mipmap levels, if we are testing mipmapped images.
-    for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
-         || (!gTestMipmaps && lod < 1);
-         lod++)
-    {
-        size_t resultValuesSize = width_lod * height_lod * depth_lod
-            * get_explicit_type_size(outputType) * 4;
-        BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
-        float lod_float = (float)lod;
-        if (gTestMipmaps)
-        {
-            // Set the lod kernel arg
-            if (gDebugTrace) log_info(" - Working at mip level %d\n", lod);
-            error = clSetKernelArg(kernel, idx, sizeof(float), &lod_float);
-            test_error(error, "Unable to set kernel arguments");
-        }
-        for (int q = 0; q < loopCount; q++)
-        {
-            float offset = float_offsets[q % float_offset_count];
-            // Init the coordinates
-            InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues,
-                                  yOffsetValues, zOffsetValues,
-                                  q >= float_offset_count ? -offset : offset,
-                                  q >= float_offset_count ? offset : -offset,
-                                  q >= float_offset_count ? -offset : offset,
-                                  imageSampler->normalized_coords, d, lod);
-            error =
-                clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     xOffsetValues, 0, NULL, NULL);
-            test_error(error, "Unable to write x offsets");
-            error =
-                clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     yOffsetValues, 0, NULL, NULL);
-            test_error(error, "Unable to write y offsets");
-            error =
-                clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     zOffsetValues, 0, NULL, NULL);
-            test_error(error, "Unable to write z offsets");
-            memset(resultValues, 0xff, resultValuesSize);
-            clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, resultValuesSize,
-                                 resultValues, 0, NULL, NULL);
-            // Figure out thread dimensions
-            threads[0] = (size_t)width_lod;
-            threads[1] = (size_t)height_lod;
-            threads[2] = (size_t)depth_lod;
-            // Run the kernel
-            error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads,
-                                           NULL, 0, NULL, NULL);
-            test_error(error, "Unable to run kernel");
-            // Get results
-            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
-                                        width_lod * height_lod * depth_lod
-                                            * get_explicit_type_size(outputType)
-                                            * 4,
-                                        resultValues, 0, NULL, NULL);
-            test_error(error, "Unable to read results from kernel");
-            if (gDebugTrace) log_info("    results read\n");
-            // Validate results element by element
-            char *imagePtr = (char *)imageValues + nextLevelOffset;
-            /*
-             * FLOAT output type
-             */
-            if (is_sRGBA_order(imageInfo->format->image_channel_order)
-                && (outputType == kFloat))
-            {
-                // Validate float results
-                float *resultPtr = (float *)(char *)resultValues;
-                float expected[4], error = 0.0f;
-                float maxErr = get_max_relative_error(
-                    imageInfo->format, imageSampler, 1 /*3D*/,
-                    CL_FILTER_LINEAR == imageSampler->filter_mode);
-                for (size_t z = 0, j = 0; z < depth_lod; z++)
-                {
-                    for (size_t y = 0; y < height_lod; y++)
-                    {
-                        for (size_t x = 0; x < width_lod; x++, j++)
-                        {
-                            // Step 1: go through and see if the results verify
-                            // for the pixel For the normalized case on a GPU we
-                            // put in offsets to the X, Y and Z to see if we
-                            // land on the right pixel. This addresses the
-                            // significant inaccuracy in GPU normalization in
-                            // OpenCL 1.0.
-                            int checkOnlyOnePixel = 0;
-                            int found_pixel = 0;
-                            float offset = NORM_OFFSET;
-                            if (!imageSampler->normalized_coords
-                                || imageSampler->filter_mode
-                                    != CL_FILTER_NEAREST
-                                || NORM_OFFSET == 0
-#if defined(__APPLE__)
-                                // Apple requires its CPU implementation to do
-                                // correctly rounded address arithmetic in all
-                                // modes
-                                || gDeviceType != CL_DEVICE_TYPE_GPU
-                            )
-                                offset = 0.0f; // Loop only once
-                            for (float norm_offset_x = -offset;
-                                 norm_offset_x <= offset && !found_pixel;
-                                 norm_offset_x += NORM_OFFSET)
-                            {
-                                for (float norm_offset_y = -offset;
-                                     norm_offset_y <= offset && !found_pixel;
-                                     norm_offset_y += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_z = -offset;
-                                         norm_offset_z <= NORM_OFFSET
-                                         && !found_pixel;
-                                         norm_offset_z += NORM_OFFSET)
-                                    {
-                                        int hasDenormals = 0;
-                                        FloatPixel maxPixel =
-                                            sample_image_pixel_float_offset(
-                                                imagePtr, imageInfo,
-                                                xOffsetValues[j],
-                                                yOffsetValues[j],
-                                                zOffsetValues[j], norm_offset_x,
-                                                norm_offset_y, norm_offset_z,
-                                                imageSampler, expected, 0,
-                                                &hasDenormals, lod);
-                                        float err1 =
-                                            ABS_ERROR(sRGBmap(resultPtr[0]),
-                                                      sRGBmap(expected[0]));
-                                        float err2 =
-                                            ABS_ERROR(sRGBmap(resultPtr[1]),
-                                                      sRGBmap(expected[1]));
-                                        float err3 =
-                                            ABS_ERROR(sRGBmap(resultPtr[2]),
-                                                      sRGBmap(expected[2]));
-                                        float err4 = ABS_ERROR(resultPtr[3],
-                                                               expected[3]);
-                                        // Clamp to the minimum absolute error
-                                        // for the format
-                                        if (err1 > 0
-                                            && err1 < formatAbsoluteError)
-                                        {
-                                            err1 = 0.0f;
-                                        }
-                                        if (err2 > 0
-                                            && err2 < formatAbsoluteError)
-                                        {
-                                            err2 = 0.0f;
-                                        }
-                                        if (err3 > 0
-                                            && err3 < formatAbsoluteError)
-                                        {
-                                            err3 = 0.0f;
-                                        }
-                                        if (err4 > 0
-                                            && err4 < formatAbsoluteError)
-                                        {
-                                            err4 = 0.0f;
-                                        }
-                                        float maxErr = 0.5;
-                                        if (!(err1 <= maxErr)
-                                            || !(err2 <= maxErr)
-                                            || !(err3 <= maxErr)
-                                            || !(err4 <= maxErr))
-                                        {
-                                            // Try flushing the denormals
-                                            if (hasDenormals)
-                                            {
-                                                // If implementation decide to
-                                                // flush subnormals to zero, max
-                                                // error needs to be adjusted
-                                                maxErr += 4 * FLT_MIN;
-                                                maxPixel =
-                                                    sample_image_pixel_float_offset(
-                                                        imagePtr, imageInfo,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z,
-                                                        imageSampler, expected,
-                                                        0, NULL, lod);
-                                                err1 = ABS_ERROR(
-                                                    sRGBmap(resultPtr[0]),
-                                                    sRGBmap(expected[0]));
-                                                err2 = ABS_ERROR(
-                                                    sRGBmap(resultPtr[1]),
-                                                    sRGBmap(expected[1]));
-                                                err3 = ABS_ERROR(
-                                                    sRGBmap(resultPtr[2]),
-                                                    sRGBmap(expected[2]));
-                                                err4 = ABS_ERROR(resultPtr[3],
-                                                                 expected[3]);
-                                            }
-                                        }
-                                        found_pixel = (err1 <= maxErr)
-                                            && (err2 <= maxErr)
-                                            && (err3 <= maxErr)
-                                            && (err4 <= maxErr);
-                                    } // norm_offset_z
-                                } // norm_offset_y
-                            } // norm_offset_x
-                            // Step 2: If we did not find a match, then print
-                            // out debugging info.
-                            if (!found_pixel)
-                            {
-                                // For the normalized case on a GPU we put in
-                                // offsets to the X and Y to see if we land on
-                                // the right pixel. This addresses the
-                                // significant inaccuracy in GPU normalization
-                                // in OpenCL 1.0.
-                                checkOnlyOnePixel = 0;
-                                int shouldReturn = 0;
-                                for (float norm_offset_x = -offset;
-                                     norm_offset_x <= offset
-                                     && !checkOnlyOnePixel;
-                                     norm_offset_x += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_y = -offset;
-                                         norm_offset_y <= offset
-                                         && !checkOnlyOnePixel;
-                                         norm_offset_y += NORM_OFFSET)
-                                    {
-                                        for (float norm_offset_z = -offset;
-                                             norm_offset_z <= offset
-                                             && !checkOnlyOnePixel;
-                                             norm_offset_z += NORM_OFFSET)
-                                        {
-                                            int hasDenormals = 0;
-                                            FloatPixel maxPixel =
-                                                sample_image_pixel_float_offset(
-                                                    imagePtr, imageInfo,
-                                                    xOffsetValues[j],
-                                                    yOffsetValues[j],
-                                                    zOffsetValues[j],
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z, imageSampler,
-                                                    expected, 0, &hasDenormals,
-                                                    lod);
-                                            float err1 =
-                                                ABS_ERROR(sRGBmap(resultPtr[0]),
-                                                          sRGBmap(expected[0]));
-                                            float err2 =
-                                                ABS_ERROR(sRGBmap(resultPtr[1]),
-                                                          sRGBmap(expected[1]));
-                                            float err3 =
-                                                ABS_ERROR(sRGBmap(resultPtr[2]),
-                                                          sRGBmap(expected[2]));
-                                            float err4 = ABS_ERROR(resultPtr[3],
-                                                                   expected[3]);
-                                            float maxErr = 0.6;
-                                            if (!(err1 <= maxErr)
-                                                || !(err2 <= maxErr)
-                                                || !(err3 <= maxErr)
-                                                || !(err4 <= maxErr))
-                                            {
-                                                // Try flushing the denormals
-                                                if (hasDenormals)
-                                                {
-                                                    // If implementation decide
-                                                    // to flush subnormals to
-                                                    // zero, max error needs to
-                                                    // be adjusted
-                                                    maxErr += 4 * FLT_MIN;
-                                                    maxPixel =
-                                                        sample_image_pixel_float(
-                                                            imagePtr, imageInfo,
-                                                            xOffsetValues[j],
-                                                            yOffsetValues[j],
-                                                            zOffsetValues[j],
-                                                            imageSampler,
-                                                            expected, 0, NULL,
-                                                            lod);
-                                                    err1 = ABS_ERROR(
-                                                        sRGBmap(resultPtr[0]),
-                                                        sRGBmap(expected[0]));
-                                                    err2 = ABS_ERROR(
-                                                        sRGBmap(resultPtr[1]),
-                                                        sRGBmap(expected[1]));
-                                                    err3 = ABS_ERROR(
-                                                        sRGBmap(resultPtr[2]),
-                                                        sRGBmap(expected[2]));
-                                                    err4 =
-                                                        ABS_ERROR(resultPtr[3],
-                                                                  expected[3]);
-                                                }
-                                            }
-                                            if (!(err1 <= maxErr)
-                                                || !(err2 <= maxErr)
-                                                || !(err3 <= maxErr)
-                                                || !(err4 <= maxErr))
-                                            {
-                                                log_error(
-                                                    "FAILED norm_offsets: %g , "
-                                                    "%g , %g:\n",
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z);
-                                                float tempOut[4];
-                                                shouldReturn |=
-                                                    determine_validation_error_offset<
-                                                        float>(
-                                                        imagePtr, imageInfo,
-                                                        imageSampler, resultPtr,
-                                                        expected, error,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z, j,
-                                                        numTries, numClamped,
-                                                        true, lod);
-                                                log_error("Step by step:\n");
-                                                FloatPixel temp =
-                                                    sample_image_pixel_float_offset(
-                                                        imagePtr, imageInfo,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z,
-                                                        imageSampler, tempOut,
-                                                        1 /*verbose*/,
-                                                        &hasDenormals, lod);
-                                                log_error(
-                                                    "\tulps: %2.2f, %2.2f, "
-                                                    "%2.2f, %2.2f  (max "
-                                                    "allowed: %2.2f)\n\n",
-                                                    Ulp_Error(resultPtr[0],
-                                                              expected[0]),
-                                                    Ulp_Error(resultPtr[1],
-                                                              expected[1]),
-                                                    Ulp_Error(resultPtr[2],
-                                                              expected[2]),
-                                                    Ulp_Error(resultPtr[3],
-                                                              expected[3]),
-                                                    Ulp_Error(
-                                                        MAKE_HEX_FLOAT(
-                                                            0x1.000002p0f,
-                                                            0x1000002L, -24)
-                                                            + maxErr,
-                                                        MAKE_HEX_FLOAT(
-                                                            0x1.000002p0f,
-                                                            0x1000002L, -24)));
-                                            }
-                                            else
-                                            {
-                                                log_error(
-                                                    "Test error: we should "
-                                                    "have detected this "
-                                                    "passing above.\n");
-                                            }
-                                        } // norm_offset_z
-                                    } // norm_offset_y
-                                } // norm_offset_x
-                                if (shouldReturn) return 1;
-                            } // if (!found_pixel)
-                            resultPtr += 4;
-                        }
-                    }
-                }
-            }
-            /*
-             * FLOAT output type
-             */
-            else if (outputType == kFloat)
-            {
-                // Validate float results
-                float *resultPtr = (float *)(char *)resultValues;
-                float expected[4], error = 0.0f;
-                float maxErr = get_max_relative_error(
-                    imageInfo->format, imageSampler, 1 /*3D*/,
-                    CL_FILTER_LINEAR == imageSampler->filter_mode);
-                for (size_t z = 0, j = 0; z < depth_lod; z++)
-                {
-                    for (size_t y = 0; y < height_lod; y++)
-                    {
-                        for (size_t x = 0; x < width_lod; x++, j++)
-                        {
-                            // Step 1: go through and see if the results verify
-                            // for the pixel For the normalized case on a GPU we
-                            // put in offsets to the X, Y and Z to see if we
-                            // land on the right pixel. This addresses the
-                            // significant inaccuracy in GPU normalization in
-                            // OpenCL 1.0.
-                            int checkOnlyOnePixel = 0;
-                            int found_pixel = 0;
-                            float offset = NORM_OFFSET;
-                            if (!imageSampler->normalized_coords
-                                || imageSampler->filter_mode
-                                    != CL_FILTER_NEAREST
-                                || NORM_OFFSET == 0
-#if defined(__APPLE__)
-                                // Apple requires its CPU implementation to do
-                                // correctly rounded address arithmetic in all
-                                // modes
-                                || gDeviceType != CL_DEVICE_TYPE_GPU
-                            )
-                                offset = 0.0f; // Loop only once
-                            for (float norm_offset_x = -offset;
-                                 norm_offset_x <= offset && !found_pixel;
-                                 norm_offset_x += NORM_OFFSET)
-                            {
-                                for (float norm_offset_y = -offset;
-                                     norm_offset_y <= offset && !found_pixel;
-                                     norm_offset_y += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_z = -offset;
-                                         norm_offset_z <= NORM_OFFSET
-                                         && !found_pixel;
-                                         norm_offset_z += NORM_OFFSET)
-                                    {
-                                        int hasDenormals = 0;
-                                        FloatPixel maxPixel =
-                                            sample_image_pixel_float_offset(
-                                                imagePtr, imageInfo,
-                                                xOffsetValues[j],
-                                                yOffsetValues[j],
-                                                zOffsetValues[j], norm_offset_x,
-                                                norm_offset_y, norm_offset_z,
-                                                imageSampler, expected, 0,
-                                                &hasDenormals, lod);
-                                        float err1 = ABS_ERROR(resultPtr[0],
-                                                               expected[0]);
-                                        float err2 = ABS_ERROR(resultPtr[1],
-                                                               expected[1]);
-                                        float err3 = ABS_ERROR(resultPtr[2],
-                                                               expected[2]);
-                                        float err4 = ABS_ERROR(resultPtr[3],
-                                                               expected[3]);
-                                        // Clamp to the minimum absolute error
-                                        // for the format
-                                        if (err1 > 0
-                                            && err1 < formatAbsoluteError)
-                                        {
-                                            err1 = 0.0f;
-                                        }
-                                        if (err2 > 0
-                                            && err2 < formatAbsoluteError)
-                                        {
-                                            err2 = 0.0f;
-                                        }
-                                        if (err3 > 0
-                                            && err3 < formatAbsoluteError)
-                                        {
-                                            err3 = 0.0f;
-                                        }
-                                        if (err4 > 0
-                                            && err4 < formatAbsoluteError)
-                                        {
-                                            err4 = 0.0f;
-                                        }
-                                        float maxErr1 = MAX(
-                                            maxErr * maxPixel.p[0], FLT_MIN);
-                                        float maxErr2 = MAX(
-                                            maxErr * maxPixel.p[1], FLT_MIN);
-                                        float maxErr3 = MAX(
-                                            maxErr * maxPixel.p[2], FLT_MIN);
-                                        float maxErr4 = MAX(
-                                            maxErr * maxPixel.p[3], FLT_MIN);
-                                        if (!(err1 <= maxErr1)
-                                            || !(err2 <= maxErr2)
-                                            || !(err3 <= maxErr3)
-                                            || !(err4 <= maxErr4))
-                                        {
-                                            // Try flushing the denormals
-                                            if (hasDenormals)
-                                            {
-                                                // If implementation decide to
-                                                // flush subnormals to zero, max
-                                                // error needs to be adjusted
-                                                maxErr1 += 4 * FLT_MIN;
-                                                maxErr2 += 4 * FLT_MIN;
-                                                maxErr3 += 4 * FLT_MIN;
-                                                maxErr4 += 4 * FLT_MIN;
-                                                maxPixel =
-                                                    sample_image_pixel_float_offset(
-                                                        imagePtr, imageInfo,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z,
-                                                        imageSampler, expected,
-                                                        0, NULL, lod);
-                                                err1 = ABS_ERROR(resultPtr[0],
-                                                                 expected[0]);
-                                                err2 = ABS_ERROR(resultPtr[1],
-                                                                 expected[1]);
-                                                err3 = ABS_ERROR(resultPtr[2],
-                                                                 expected[2]);
-                                                err4 = ABS_ERROR(resultPtr[3],
-                                                                 expected[3]);
-                                            }
-                                        }
-                                        found_pixel = (err1 <= maxErr1)
-                                            && (err2 <= maxErr2)
-                                            && (err3 <= maxErr3)
-                                            && (err4 <= maxErr4);
-                                    } // norm_offset_z
-                                } // norm_offset_y
-                            } // norm_offset_x
-                            // Step 2: If we did not find a match, then print
-                            // out debugging info.
-                            if (!found_pixel)
-                            {
-                                // For the normalized case on a GPU we put in
-                                // offsets to the X and Y to see if we land on
-                                // the right pixel. This addresses the
-                                // significant inaccuracy in GPU normalization
-                                // in OpenCL 1.0.
-                                checkOnlyOnePixel = 0;
-                                int shouldReturn = 0;
-                                for (float norm_offset_x = -offset;
-                                     norm_offset_x <= offset
-                                     && !checkOnlyOnePixel;
-                                     norm_offset_x += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_y = -offset;
-                                         norm_offset_y <= offset
-                                         && !checkOnlyOnePixel;
-                                         norm_offset_y += NORM_OFFSET)
-                                    {
-                                        for (float norm_offset_z = -offset;
-                                             norm_offset_z <= offset
-                                             && !checkOnlyOnePixel;
-                                             norm_offset_z += NORM_OFFSET)
-                                        {
-                                            int hasDenormals = 0;
-                                            FloatPixel maxPixel =
-                                                sample_image_pixel_float_offset(
-                                                    imagePtr, imageInfo,
-                                                    xOffsetValues[j],
-                                                    yOffsetValues[j],
-                                                    zOffsetValues[j],
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z, imageSampler,
-                                                    expected, 0, &hasDenormals,
-                                                    lod);
-                                            float err1 = ABS_ERROR(resultPtr[0],
-                                                                   expected[0]);
-                                            float err2 = ABS_ERROR(resultPtr[1],
-                                                                   expected[1]);
-                                            float err3 = ABS_ERROR(resultPtr[2],
-                                                                   expected[2]);
-                                            float err4 = ABS_ERROR(resultPtr[3],
-                                                                   expected[3]);
-                                            float maxErr1 =
-                                                MAX(maxErr * maxPixel.p[0],
-                                                    FLT_MIN);
-                                            float maxErr2 =
-                                                MAX(maxErr * maxPixel.p[1],
-                                                    FLT_MIN);
-                                            float maxErr3 =
-                                                MAX(maxErr * maxPixel.p[2],
-                                                    FLT_MIN);
-                                            float maxErr4 =
-                                                MAX(maxErr * maxPixel.p[3],
-                                                    FLT_MIN);
-                                            if (!(err1 <= maxErr1)
-                                                || !(err2 <= maxErr2)
-                                                || !(err3 <= maxErr3)
-                                                || !(err4 <= maxErr4))
-                                            {
-                                                // Try flushing the denormals
-                                                if (hasDenormals)
-                                                {
-                                                    maxErr1 += 4 * FLT_MIN;
-                                                    maxErr2 += 4 * FLT_MIN;
-                                                    maxErr3 += 4 * FLT_MIN;
-                                                    maxErr4 += 4 * FLT_MIN;
-                                                    maxPixel =
-                                                        sample_image_pixel_float(
-                                                            imagePtr, imageInfo,
-                                                            xOffsetValues[j],
-                                                            yOffsetValues[j],
-                                                            zOffsetValues[j],
-                                                            imageSampler,
-                                                            expected, 0, NULL,
-                                                            lod);
-                                                    err1 =
-                                                        ABS_ERROR(resultPtr[0],
-                                                                  expected[0]);
-                                                    err2 =
-                                                        ABS_ERROR(resultPtr[1],
-                                                                  expected[1]);
-                                                    err3 =
-                                                        ABS_ERROR(resultPtr[2],
-                                                                  expected[2]);
-                                                    err4 =
-                                                        ABS_ERROR(resultPtr[3],
-                                                                  expected[3]);
-                                                }
-                                            }
-                                            if (!(err1 <= maxErr1)
-                                                || !(err2 <= maxErr2)
-                                                || !(err3 <= maxErr3)
-                                                || !(err4 <= maxErr4))
-                                            {
-                                                log_error(
-                                                    "FAILED norm_offsets: %g , "
-                                                    "%g , %g:\n",
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z);
-                                                float tempOut[4];
-                                                shouldReturn |=
-                                                    determine_validation_error_offset<
-                                                        float>(
-                                                        imagePtr, imageInfo,
-                                                        imageSampler, resultPtr,
-                                                        expected, error,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z, j,
-                                                        numTries, numClamped,
-                                                        true, lod);
-                                                log_error("Step by step:\n");
-                                                FloatPixel temp =
-                                                    sample_image_pixel_float_offset(
-                                                        imagePtr, imageInfo,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z,
-                                                        imageSampler, tempOut,
-                                                        1 /*verbose*/,
-                                                        &hasDenormals, lod);
-                                                log_error(
-                                                    "\tulps: %2.2f, %2.2f, "
-                                                    "%2.2f, %2.2f  (max "
-                                                    "allowed: %2.2f)\n\n",
-                                                    Ulp_Error(resultPtr[0],
-                                                              expected[0]),
-                                                    Ulp_Error(resultPtr[1],
-                                                              expected[1]),
-                                                    Ulp_Error(resultPtr[2],
-                                                              expected[2]),
-                                                    Ulp_Error(resultPtr[3],
-                                                              expected[3]),
-                                                    Ulp_Error(
-                                                        MAKE_HEX_FLOAT(
-                                                            0x1.000002p0f,
-                                                            0x1000002L, -24)
-                                                            + maxErr,
-                                                        MAKE_HEX_FLOAT(
-                                                            0x1.000002p0f,
-                                                            0x1000002L, -24)));
-                                            }
-                                            else
-                                            {
-                                                log_error(
-                                                    "Test error: we should "
-                                                    "have detected this "
-                                                    "passing above.\n");
-                                            }
-                                        } // norm_offset_z
-                                    } // norm_offset_y
-                                } // norm_offset_x
-                                if (shouldReturn) return 1;
-                            } // if (!found_pixel)
-                            resultPtr += 4;
-                        }
-                    }
-                }
-            }
-            /*
-             * UINT output type
-             */
-            else if (outputType == kUInt)
-            {
-                // Validate unsigned integer results
-                unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
-                unsigned int expected[4];
-                float error;
-                for (size_t z = 0, j = 0; z < depth_lod; z++)
-                {
-                    for (size_t y = 0; y < height_lod; y++)
-                    {
-                        for (size_t x = 0; x < width_lod; x++, j++)
-                        {
-                            // Step 1: go through and see if the results verify
-                            // for the pixel For the normalized case on a GPU we
-                            // put in offsets to the X, Y and Z to see if we
-                            // land on the right pixel. This addresses the
-                            // significant inaccuracy in GPU normalization in
-                            // OpenCL 1.0.
-                            int checkOnlyOnePixel = 0;
-                            int found_pixel = 0;
-                            for (float norm_offset_x = -NORM_OFFSET;
-                                 norm_offset_x <= NORM_OFFSET && !found_pixel
-                                 && !checkOnlyOnePixel;
-                                 norm_offset_x += NORM_OFFSET)
-                            {
-                                for (float norm_offset_y = -NORM_OFFSET;
-                                     norm_offset_y <= NORM_OFFSET
-                                     && !found_pixel && !checkOnlyOnePixel;
-                                     norm_offset_y += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_z = -NORM_OFFSET;
-                                         norm_offset_z <= NORM_OFFSET
-                                         && !found_pixel && !checkOnlyOnePixel;
-                                         norm_offset_z += NORM_OFFSET)
-                                    {
-                                        // If we are not on a GPU, or we are not
-                                        // normalized, then only test with
-                                        // offsets (0.0, 0.0) E.g., test one
-                                        // pixel.
-                                        if (!imageSampler->normalized_coords
-                                            || gDeviceType != CL_DEVICE_TYPE_GPU
-                                            || NORM_OFFSET == 0)
-                                        {
-                                            norm_offset_x = 0.0f;
-                                            norm_offset_y = 0.0f;
-                                            norm_offset_z = 0.0f;
-                                            checkOnlyOnePixel = 1;
-                                        }
-                                        sample_image_pixel_offset<unsigned int>(
-                                            imagePtr, imageInfo,
-                                            xOffsetValues[j], yOffsetValues[j],
-                                            zOffsetValues[j], norm_offset_x,
-                                            norm_offset_y, norm_offset_z,
-                                            imageSampler, expected, lod);
-                                        error = errMax(
-                                            errMax(abs_diff_uint(expected[0],
-                                                                 resultPtr[0]),
-                                                   abs_diff_uint(expected[1],
-                                                                 resultPtr[1])),
-                                            errMax(
-                                                abs_diff_uint(expected[2],
-                                                              resultPtr[2]),
-                                                abs_diff_uint(expected[3],
-                                                              resultPtr[3])));
-                                        if (error < MAX_ERR) found_pixel = 1;
-                                    } // norm_offset_z
-                                } // norm_offset_y
-                            } // norm_offset_x
-                            // Step 2: If we did not find a match, then print
-                            // out debugging info.
-                            if (!found_pixel)
-                            {
-                                // For the normalized case on a GPU we put in
-                                // offsets to the X and Y to see if we land on
-                                // the right pixel. This addresses the
-                                // significant inaccuracy in GPU normalization
-                                // in OpenCL 1.0.
-                                checkOnlyOnePixel = 0;
-                                int shouldReturn = 0;
-                                for (float norm_offset_x = -NORM_OFFSET;
-                                     norm_offset_x <= NORM_OFFSET
-                                     && !checkOnlyOnePixel;
-                                     norm_offset_x += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_y = -NORM_OFFSET;
-                                         norm_offset_y <= NORM_OFFSET
-                                         && !checkOnlyOnePixel;
-                                         norm_offset_y += NORM_OFFSET)
-                                    {
-                                        for (float norm_offset_z = -NORM_OFFSET;
-                                             norm_offset_z <= NORM_OFFSET
-                                             && !checkOnlyOnePixel;
-                                             norm_offset_z += NORM_OFFSET)
-                                        {
-                                            // If we are not on a GPU, or we are
-                                            // not normalized, then only test
-                                            // with offsets (0.0, 0.0) E.g.,
-                                            // test one pixel.
-                                            if (!imageSampler->normalized_coords
-                                                || gDeviceType
-                                                    != CL_DEVICE_TYPE_GPU
-                                                || NORM_OFFSET == 0)
-                                            {
-                                                norm_offset_x = 0.0f;
-                                                norm_offset_y = 0.0f;
-                                                norm_offset_z = 0.0f;
-                                                checkOnlyOnePixel = 1;
-                                            }
-                                            sample_image_pixel_offset<
-                                                unsigned int>(
-                                                imagePtr, imageInfo,
-                                                xOffsetValues[j],
-                                                yOffsetValues[j],
-                                                zOffsetValues[j], norm_offset_x,
-                                                norm_offset_y, norm_offset_z,
-                                                imageSampler, expected, lod);
-                                            error = errMax(
-                                                errMax(
-                                                    abs_diff_uint(expected[0],
-                                                                  resultPtr[0]),
-                                                    abs_diff_uint(
-                                                        expected[1],
-                                                        resultPtr[1])),
-                                                errMax(
-                                                    abs_diff_uint(expected[2],
-                                                                  resultPtr[2]),
-                                                    abs_diff_uint(
-                                                        expected[3],
-                                                        resultPtr[3])));
-                                            if (error > MAX_ERR)
-                                            {
-                                                log_error(
-                                                    "FAILED norm_offsets: %g , "
-                                                    "%g , %g:\n",
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z);
-                                                shouldReturn |=
-                                                    determine_validation_error_offset<
-                                                        unsigned int>(
-                                                        imagePtr, imageInfo,
-                                                        imageSampler, resultPtr,
-                                                        expected, error,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z, j,
-                                                        numTries, numClamped,
-                                                        false, lod);
-                                            }
-                                            else
-                                            {
-                                                log_error(
-                                                    "Test error: we should "
-                                                    "have detected this "
-                                                    "passing above.\n");
-                                            }
-                                        } // norm_offset_z
-                                    } // norm_offset_y
-                                } // norm_offset_x
-                                if (shouldReturn) return 1;
-                            } // if (!found_pixel)
-                            resultPtr += 4;
-                        }
-                    }
-                }
-            }
-            else
-            /*
-             * INT output type
-             */
-            {
-                // Validate integer results
-                int *resultPtr = (int *)(char *)resultValues;
-                int expected[4];
-                float error;
-                for (size_t z = 0, j = 0; z < depth_lod; z++)
-                {
-                    for (size_t y = 0; y < height_lod; y++)
-                    {
-                        for (size_t x = 0; x < width_lod; x++, j++)
-                        {
-                            // Step 1: go through and see if the results verify
-                            // for the pixel For the normalized case on a GPU we
-                            // put in offsets to the X, Y and Z to see if we
-                            // land on the right pixel. This addresses the
-                            // significant inaccuracy in GPU normalization in
-                            // OpenCL 1.0.
-                            int checkOnlyOnePixel = 0;
-                            int found_pixel = 0;
-                            for (float norm_offset_x = -NORM_OFFSET;
-                                 norm_offset_x <= NORM_OFFSET && !found_pixel
-                                 && !checkOnlyOnePixel;
-                                 norm_offset_x += NORM_OFFSET)
-                            {
-                                for (float norm_offset_y = -NORM_OFFSET;
-                                     norm_offset_y <= NORM_OFFSET
-                                     && !found_pixel && !checkOnlyOnePixel;
-                                     norm_offset_y += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_z = -NORM_OFFSET;
-                                         norm_offset_z <= NORM_OFFSET
-                                         && !found_pixel && !checkOnlyOnePixel;
-                                         norm_offset_z += NORM_OFFSET)
-                                    {
-                                        // If we are not on a GPU, or we are not
-                                        // normalized, then only test with
-                                        // offsets (0.0, 0.0) E.g., test one
-                                        // pixel.
-                                        if (!imageSampler->normalized_coords
-                                            || gDeviceType != CL_DEVICE_TYPE_GPU
-                                            || NORM_OFFSET == 0)
-                                        {
-                                            norm_offset_x = 0.0f;
-                                            norm_offset_y = 0.0f;
-                                            norm_offset_z = 0.0f;
-                                            checkOnlyOnePixel = 1;
-                                        }
-                                        sample_image_pixel_offset<int>(
-                                            imagePtr, imageInfo,
-                                            xOffsetValues[j], yOffsetValues[j],
-                                            zOffsetValues[j], norm_offset_x,
-                                            norm_offset_y, norm_offset_z,
-                                            imageSampler, expected, lod);
-                                        error = errMax(
-                                            errMax(abs_diff_int(expected[0],
-                                                                resultPtr[0]),
-                                                   abs_diff_int(expected[1],
-                                                                resultPtr[1])),
-                                            errMax(abs_diff_int(expected[2],
-                                                                resultPtr[2]),
-                                                   abs_diff_int(expected[3],
-                                                                resultPtr[3])));
-                                        if (error < MAX_ERR) found_pixel = 1;
-                                    } // norm_offset_z
-                                } // norm_offset_y
-                            } // norm_offset_x
-                            // Step 2: If we did not find a match, then print
-                            // out debugging info.
-                            if (!found_pixel)
-                            {
-                                // For the normalized case on a GPU we put in
-                                // offsets to the X and Y to see if we land on
-                                // the right pixel. This addresses the
-                                // significant inaccuracy in GPU normalization
-                                // in OpenCL 1.0.
-                                checkOnlyOnePixel = 0;
-                                int shouldReturn = 0;
-                                for (float norm_offset_x = -NORM_OFFSET;
-                                     norm_offset_x <= NORM_OFFSET
-                                     && !checkOnlyOnePixel;
-                                     norm_offset_x += NORM_OFFSET)
-                                {
-                                    for (float norm_offset_y = -NORM_OFFSET;
-                                         norm_offset_y <= NORM_OFFSET
-                                         && !checkOnlyOnePixel;
-                                         norm_offset_y += NORM_OFFSET)
-                                    {
-                                        for (float norm_offset_z = -NORM_OFFSET;
-                                             norm_offset_z <= NORM_OFFSET
-                                             && !checkOnlyOnePixel;
-                                             norm_offset_z += NORM_OFFSET)
-                                        {
-                                            // If we are not on a GPU, or we are
-                                            // not normalized, then only test
-                                            // with offsets (0.0, 0.0) E.g.,
-                                            // test one pixel.
-                                            if (!imageSampler->normalized_coords
-                                                || gDeviceType
-                                                    != CL_DEVICE_TYPE_GPU
-                                                || NORM_OFFSET == 0
-                                                || NORM_OFFSET == 0
-                                                || NORM_OFFSET == 0)
-                                            {
-                                                norm_offset_x = 0.0f;
-                                                norm_offset_y = 0.0f;
-                                                norm_offset_z = 0.0f;
-                                                checkOnlyOnePixel = 1;
-                                            }
-                                            sample_image_pixel_offset<int>(
-                                                imagePtr, imageInfo,
-                                                xOffsetValues[j],
-                                                yOffsetValues[j],
-                                                zOffsetValues[j], norm_offset_x,
-                                                norm_offset_y, norm_offset_z,
-                                                imageSampler, expected, lod);
-                                            error = errMax(
-                                                errMax(
-                                                    abs_diff_int(expected[0],
-                                                                 resultPtr[0]),
-                                                    abs_diff_int(expected[1],
-                                                                 resultPtr[1])),
-                                                errMax(
-                                                    abs_diff_int(expected[2],
-                                                                 resultPtr[2]),
-                                                    abs_diff_int(
-                                                        expected[3],
-                                                        resultPtr[3])));
-                                            if (error > MAX_ERR)
-                                            {
-                                                log_error(
-                                                    "FAILED norm_offsets: %g , "
-                                                    "%g , %g:\n",
-                                                    norm_offset_x,
-                                                    norm_offset_y,
-                                                    norm_offset_z);
-                                                shouldReturn |=
-                                                    determine_validation_error_offset<
-                                                        int>(
-                                                        imagePtr, imageInfo,
-                                                        imageSampler, resultPtr,
-                                                        expected, error,
-                                                        xOffsetValues[j],
-                                                        yOffsetValues[j],
-                                                        zOffsetValues[j],
-                                                        norm_offset_x,
-                                                        norm_offset_y,
-                                                        norm_offset_z, j,
-                                                        numTries, numClamped,
-                                                        false, lod);
-                                            }
-                                            else
-                                            {
-                                                log_error(
-                                                    "Test error: we should "
-                                                    "have detected this "
-                                                    "passing above.\n");
-                                            }
-                                        } // norm_offset_z
-                                    } // norm_offset_y
-                                } // norm_offset_x
-                                if (shouldReturn) return 1;
-                            } // if (!found_pixel)
-                            resultPtr += 4;
-                        }
-                    }
-                }
-            }
-        }
-        {
-            nextLevelOffset += width_lod * height_lod * depth_lod
-                * get_pixel_size(imageInfo->format);
-            width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
-            height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
-            depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
-        }
-    }
-    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
\ No newline at end of file
diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h
index e7ecbe0..1a1a8a1 100644
--- a/test_conformance/images/kernel_read_write/test_common.h
+++ b/test_conformance/images/kernel_read_write/test_common.h
@@ -1,231 +1,7 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
 #include "../testBase.h"
 #define ABS_ERROR(result, expected) (fabs(expected - result))
-#define CLAMP(_val, _min, _max)                                                \
-    ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
-#define MAX_ERR 0.005f
-#define MAX_TRIES 1
-#define MAX_CLAMPED 1
 extern cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error);
-extern void read_image_pixel_float(void *imageData, image_descriptor *imageInfo,
-                                   int x, int y, int z, float *outData);
-extern bool gExtraValidateInfo;
-extern bool gDisableOffsets;
-extern bool gUseKernelSamplers;
-extern cl_mem_flags gMemFlagsToUse;
-extern int gtestTypesToRun;
-extern uint64_t gRoundingStartValue;
-extern bool gPrintOptions;
-extern int test_read_image(cl_context context, cl_command_queue queue,
-                           cl_kernel kernel, image_descriptor *imageInfo,
-                           image_sampler_data *imageSampler,
-                           bool useFloatCoords, ExplicitType outputType,
-                           MTdata d);
-extern void InitFloatCoordsCommon(image_descriptor *imageInfo,
-                                  image_sampler_data *imageSampler,
-                                  float *xOffsets, float *yOffsets,
-                                  float *zOffsets, float xfract, float yfract,
-                                  float zfract, int normalized_coords, MTdata d,
-                                  int lod);
-template <class T>
-int determine_validation_error_offset(
-    void *imagePtr, image_descriptor *imageInfo,
-    image_sampler_data *imageSampler, T *resultPtr, T *expected, float error,
-    float x, float y, float z, float xAddressOffset, float yAddressOffset,
-    float zAddressOffset, size_t j, int &numTries, int &numClamped,
-    bool printAsFloat, int lod)
-    int actualX, actualY, actualZ;
-    int found = debug_find_pixel_in_image(imagePtr, imageInfo, resultPtr,
-                                          &actualX, &actualY, &actualZ, lod);
-    bool clampingErr = false, clamped = false, otherClampingBug = false;
-    int clampedX, clampedY, clampedZ;
-    size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height,
-           imageDepth = imageInfo->depth;
-    clamped = get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset,
-                                        zAddressOffset, imageWidth, imageHeight,
-                                        imageDepth, imageSampler, imageInfo,
-                                        clampedX, clampedY, clampedZ);
-    if (found)
-    {
-        // Is it a clamping bug?
-        if (clamped && clampedX == actualX && clampedY == actualY
-            && clampedZ == actualZ)
-        {
-            if ((--numClamped) == 0)
-            {
-                if (printAsFloat)
-                {
-                    log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did "
-                              "not validate! Expected (%g,%g,%g,%g), got "
-                              "(%g,%g,%g,%g), error of %g\n",
-                              j, x, x, y, y, z, z, (float)expected[0],
-                              (float)expected[1], (float)expected[2],
-                              (float)expected[3], (float)resultPtr[0],
-                              (float)resultPtr[1], (float)resultPtr[2],
-                              (float)resultPtr[3], error);
-                }
-                else
-                {
-                    log_error(
-                        "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                        "validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
-                        j, x, x, y, y, z, z, (int)expected[0], (int)expected[1],
-                        (int)expected[2], (int)expected[3], (int)resultPtr[0],
-                        (int)resultPtr[1], (int)resultPtr[2],
-                        (int)resultPtr[3]);
-                }
-                log_error("ERROR: TEST FAILED: Read is erroneously clamping "
-                          "coordinates!\n");
-                return -1;
-            }
-            clampingErr = true;
-            otherClampingBug = true;
-        }
-    }
-    if (clamped && !otherClampingBug)
-    {
-        // If we are in clamp-to-edge mode and we're getting zeroes, it's
-        // possible we're getting border erroneously
-        if (resultPtr[0] == 0 && resultPtr[1] == 0 && resultPtr[2] == 0
-            && resultPtr[3] == 0)
-        {
-            if ((--numClamped) == 0)
-            {
-                if (printAsFloat)
-                {
-                    log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did "
-                              "not validate! Expected (%g,%g,%g,%g), got "
-                              "(%g,%g,%g,%g), error of %g\n",
-                              j, x, x, y, y, z, z, (float)expected[0],
-                              (float)expected[1], (float)expected[2],
-                              (float)expected[3], (float)resultPtr[0],
-                              (float)resultPtr[1], (float)resultPtr[2],
-                              (float)resultPtr[3], error);
-                }
-                else
-                {
-                    log_error(
-                        "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                        "validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
-                        j, x, x, y, y, z, z, (int)expected[0], (int)expected[1],
-                        (int)expected[2], (int)expected[3], (int)resultPtr[0],
-                        (int)resultPtr[1], (int)resultPtr[2],
-                        (int)resultPtr[3]);
-                }
-                log_error("ERROR: TEST FAILED: Clamping is erroneously "
-                          "returning border color!\n");
-                return -1;
-            }
-            clampingErr = true;
-        }
-    }
-    if (!clampingErr)
-    {
-        /*        if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 ||
-         (int)y + (int)yOffsetValues[ j ] < 0 ) )
-         {
-         log_error( "NEGATIVE COORDINATE ERROR\n" );
-         return -1;
-         }
-         */
-        if (true) // gExtraValidateInfo )
-        {
-            if (printAsFloat)
-            {
-                log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                          "validate!\n\tExpected (%g,%g,%g,%g),\n\t     got "
-                          "(%g,%g,%g,%g), error of %g\n",
-                          j, x, x, y, y, z, z, (float)expected[0],
-                          (float)expected[1], (float)expected[2],
-                          (float)expected[3], (float)resultPtr[0],
-                          (float)resultPtr[1], (float)resultPtr[2],
-                          (float)resultPtr[3], error);
-            }
-            else
-            {
-                log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                          "validate!\n\tExpected (%x,%x,%x,%x),\n\t     got "
-                          "(%x,%x,%x,%x)\n",
-                          j, x, x, y, y, z, z, (int)expected[0],
-                          (int)expected[1], (int)expected[2], (int)expected[3],
-                          (int)resultPtr[0], (int)resultPtr[1],
-                          (int)resultPtr[2], (int)resultPtr[3]);
-            }
-            log_error(
-                "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n",
-                clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight,
-                (int)imageDepth);
-            if (printAsFloat && gExtraValidateInfo)
-            {
-                log_error("\nNearby values:\n");
-                for (int zOff = -1; zOff <= 1; zOff++)
-                {
-                    for (int yOff = -1; yOff <= 1; yOff++)
-                    {
-                        float top[4], real[4], bot[4];
-                        read_image_pixel_float(imagePtr, imageInfo,
-                                               clampedX - 1, clampedY + yOff,
-                                               clampedZ + zOff, top);
-                        read_image_pixel_float(imagePtr, imageInfo, clampedX,
-                                               clampedY + yOff, clampedZ + zOff,
-                                               real);
-                        read_image_pixel_float(imagePtr, imageInfo,
-                                               clampedX + 1, clampedY + yOff,
-                                               clampedZ + zOff, bot);
-                        log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2],
-                                  top[3]);
-                        log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2],
-                                  real[3]);
-                        log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2],
-                                  bot[3]);
-                    }
-                }
-            }
-            //        }
-            //        else
-            //            log_error( "\n" );
-            if (imageSampler->filter_mode != CL_FILTER_LINEAR)
-            {
-                if (found)
-                    log_error(
-                        "\tValue really found in image at %d,%d,%d (%s)\n",
-                        actualX, actualY, actualZ,
-                        (found > 1) ? "NOT unique!!" : "unique");
-                else
-                    log_error("\tValue not actually found in image\n");
-            }
-            log_error("\n");
-        }
-        numClamped = -1; // We force the clamped counter to never work
-        if ((--numTries) == 0) return -1;
-    }
-    return 0;
diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index 03ca959..0b7d424 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -1,5 +1,5 @@
-// Copyright (c) 2017, 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -22,7 +22,20 @@
     #include <setjmp.h>
-extern bool gTestImage2DFromBuffer;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestImage2DFromBuffer, gTestMipmaps;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+extern bool gDeviceLt20;
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
@@ -72,6 +85,8 @@
 static const char *offsetSource =
 "   int offset = tidY*get_image_width(input) + tidX;\n";
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData );
 template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
                                                 T *resultPtr, T * expected, float error,
                                 float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
@@ -275,6 +290,8 @@
     return 0;
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
     size_t i = 0;
@@ -387,6 +404,9 @@
+#ifndef MAX
+    #define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
 int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
@@ -1154,11 +1174,8 @@
             if ( IsFloatSubnormal( expected[j] ) && actual[j] == 0.0f )
-            if (expected[j] != actual[j])
-            {
-                pass = false;
-                break;
-            }
+            pass = false;
+            break;
     return pass;
@@ -1176,11 +1193,8 @@
             if ( is_half_denorm( expected[j] ) && is_half_zero( actual[j] ) )
-            if (expected[j] != actual[j])
-            {
-                pass = false;
-                break;
-            }
+            pass = false;
+            break;
     return pass;
@@ -1414,20 +1428,11 @@
     if( gDebugTrace )
         log_info( " - Creating kernel arguments...\n" );
-    xOffsets =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * imageInfo->width * imageInfo->height,
-                       xOffsetValues, &error);
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, xOffsetValues, &error );
     test_error( error, "Unable to create x offset buffer" );
-    yOffsets =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_float) * imageInfo->width * imageInfo->height,
-                       yOffsetValues, &error);
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, yOffsetValues, &error );
     test_error( error, "Unable to create y offset buffer" );
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             get_explicit_type_size(outputType) * 4
-                                 * imageInfo->width * imageInfo->height,
-                             NULL, &error);
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height, NULL, &error );
     test_error( error, "Unable to create result buffer" );
     // Create sampler to use
@@ -1557,11 +1562,8 @@
     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
-int test_read_image_set_2D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler, bool floatCoords,
-                           ExplicitType outputType)
+int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                        bool floatCoords, ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -1661,8 +1663,7 @@
             gTestMipmaps?", lod":" ");
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
     if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_read_write/test_loops.cpp b/test_conformance/images/kernel_read_write/test_loops.cpp
index 795a9ed..0cf70a8 100644
--- a/test_conformance/images/kernel_read_write/test_loops.cpp
+++ b/test_conformance/images/kernel_read_write/test_loops.cpp
@@ -16,61 +16,43 @@
 #include "../testBase.h"
 #include "../common.h"
-extern cl_filter_mode gFilterModeToUse;
+extern cl_filter_mode     gFilterModeToUse;
 extern cl_addressing_mode gAddressModeToUse;
-extern int gNormalizedModeToUse;
-extern int gTypesToTest;
-extern int gtestTypesToRun;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_order      gChannelOrderToUse;
-extern int test_read_image_set_1D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  bool floatCoords, ExplicitType outputType);
-extern int test_read_image_set_2D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  bool floatCoords, ExplicitType outputType);
-extern int test_read_image_set_3D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  bool floatCoords, ExplicitType outputType);
-extern int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        const cl_image_format *format,
-                                        image_sampler_data *imageSampler,
-                                        bool floatCoords,
-                                        ExplicitType outputType);
-extern int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        const cl_image_format *format,
-                                        image_sampler_data *imageSampler,
-                                        bool floatCoords,
-                                        ExplicitType outputType);
+extern bool gDebugTrace;
+extern bool gTestMipmaps;
-int test_read_image_type(cl_device_id device, cl_context context,
-                         cl_command_queue queue, const cl_image_format *format,
-                         bool floatCoords, image_sampler_data *imageSampler,
-                         ExplicitType outputType, cl_mem_object_type imageType)
+extern int  gtestTypesToRun;
+extern bool gDeviceLt20;
+extern int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue,  cl_image_format *format, image_sampler_data *imageSampler,
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                                        bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                                        bool floatCoords, ExplicitType outputType );
+int test_read_image_type( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, bool floatCoords,
+                         image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
     int ret = 0;
     cl_addressing_mode *addressModes = NULL;
-    // The sampler-less read image functions behave exactly as the corresponding
-    // read image functions described in section that take integer
-    // coordinates and a sampler with filter mode set to CLK_FILTER_NEAREST,
-    // normalized coordinates set to CLK_NORMALIZED_COORDS_FALSE and addressing
-    // mode to CLK_ADDRESS_NONE
-    cl_addressing_mode addressModes_rw[] = { CL_ADDRESS_NONE,
-                                             (cl_addressing_mode)-1 };
-    cl_addressing_mode addressModes_ro[] = {
-        CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT, (cl_addressing_mode)-1
-    };
+    // The sampler-less read image functions behave exactly as the corresponding read image functions
+    // described in section that take integer coordinates and a sampler with filter mode set to
+    // CLK_FILTER_NEAREST, normalized coordinates set to CLK_NORMALIZED_COORDS_FALSE and addressing mode to CLK_ADDRESS_NONE
+    cl_addressing_mode addressModes_rw[] = { CL_ADDRESS_NONE, (cl_addressing_mode)-1 };
+    cl_addressing_mode addressModes_ro[] = { /* CL_ADDRESS_CLAMP_NONE,*/ CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT, (cl_addressing_mode)-1 };
-    if (gtestTypesToRun & kReadWriteTests)
+    if(gtestTypesToRun & kReadWriteTests)
         addressModes = addressModes_rw;
@@ -79,45 +61,39 @@
         addressModes = addressModes_ro;
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
     // According to the OpenCL specification, we do not guarantee the precision
     // of operations for linear filtering on the GPU.  We do not test linear
     // filtering for the CL_RGB CL_UNORM_INT_101010 image format; however, we
     // test it internally for a set of other image formats.
-    if ((gDeviceType == CL_DEVICE_TYPE_GPU)
-        && (imageSampler->filter_mode == CL_FILTER_LINEAR)
-        && (format->image_channel_order == CL_RGB)
-        && (format->image_channel_data_type == CL_UNORM_INT_101010))
+    if ((gDeviceType == CL_DEVICE_TYPE_GPU) &&
+        (imageSampler->filter_mode == CL_FILTER_LINEAR) &&
+        (format->image_channel_order == CL_RGB) &&
+        (format->image_channel_data_type == CL_UNORM_INT_101010))
-        log_info("--- Skipping CL_RGB CL_UNORM_INT_101010 format with "
-                 "CL_FILTER_LINEAR on GPU.\n");
+        log_info("--- Skipping CL_RGB CL_UNORM_INT_101010 format with CL_FILTER_LINEAR on GPU.\n");
         return 0;
-    for (int adMode = 0; addressModes[adMode] != (cl_addressing_mode)-1;
-         adMode++)
+    for( int adMode = 0; addressModes[ adMode ] != (cl_addressing_mode)-1; adMode++ )
-        imageSampler->addressing_mode = addressModes[adMode];
+        imageSampler->addressing_mode = addressModes[ adMode ];
-        if ((addressModes[adMode] == CL_ADDRESS_REPEAT
-             || addressModes[adMode] == CL_ADDRESS_MIRRORED_REPEAT)
-            && !(imageSampler->normalized_coords))
+        if( (addressModes[ adMode ] == CL_ADDRESS_REPEAT || addressModes[ adMode ] == CL_ADDRESS_MIRRORED_REPEAT) && !( imageSampler->normalized_coords ) )
             continue; // Repeat doesn't make sense for non-normalized coords
         // Use this run if we were told to only run a certain filter mode
-        if (gAddressModeToUse != (cl_addressing_mode)-1
-            && imageSampler->addressing_mode != gAddressModeToUse)
+        if( gAddressModeToUse != (cl_addressing_mode)-1 && imageSampler->addressing_mode != gAddressModeToUse )
          Remove redundant check to see if workaround still necessary
-         // Check added in because this case was leaking through causing a crash
-         on CPU if( ! imageSampler->normalized_coords &&
-         imageSampler->addressing_mode == CL_ADDRESS_REPEAT ) continue; //repeat
-         mode requires normalized coordinates
+         // Check added in because this case was leaking through causing a crash on CPU
+         if( ! imageSampler->normalized_coords && imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
+         continue;       //repeat mode requires normalized coordinates
-        print_read_header(format, imageSampler, false);
+        print_read_header( format, imageSampler, false );
@@ -125,37 +101,27 @@
         switch (imageType)
             case CL_MEM_OBJECT_IMAGE1D:
-                retCode = test_read_image_set_1D(device, context, queue, format,
-                                                 imageSampler, floatCoords,
-                                                 outputType);
+                retCode = test_read_image_set_1D( device, context, queue, format, imageSampler, floatCoords, outputType );
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                retCode = test_read_image_set_1D_array(device, context, queue,
-                                                       format, imageSampler,
-                                                       floatCoords, outputType);
+                retCode = test_read_image_set_1D_array( device, context, queue, format, imageSampler, floatCoords, outputType );
             case CL_MEM_OBJECT_IMAGE2D:
-                retCode = test_read_image_set_2D(device, context, queue, format,
-                                                 imageSampler, floatCoords,
-                                                 outputType);
+                retCode = test_read_image_set_2D( device, context, queue, format, imageSampler, floatCoords, outputType );
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                retCode = test_read_image_set_2D_array(device, context, queue,
-                                                       format, imageSampler,
-                                                       floatCoords, outputType);
+                retCode = test_read_image_set_2D_array( device, context, queue, format, imageSampler, floatCoords, outputType );
             case CL_MEM_OBJECT_IMAGE3D:
-                retCode = test_read_image_set_3D(device, context, queue, format,
-                                                 imageSampler, floatCoords,
-                                                 outputType);
+                retCode = test_read_image_set_3D( device, context, queue, format, imageSampler, floatCoords, outputType );
-        if (retCode != 0)
+        if( retCode != 0 )
-            log_error("FAILED: ");
-            print_read_header(format, imageSampler, true);
-            log_info("\n");
+            log_error( "FAILED: " );
+            print_read_header( format, imageSampler, true );
+            log_info( "\n" );
         ret |= retCode;
@@ -163,13 +129,8 @@
     return ret;
-int test_read_image_formats(cl_device_id device, cl_context context,
-                            cl_command_queue queue,
-                            const std::vector<cl_image_format> &formatList,
-                            const std::vector<bool> &filterFlags,
-                            image_sampler_data *imageSampler,
-                            ExplicitType outputType,
-                            cl_mem_object_type imageType)
+int test_read_image_formats( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                            image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
     int ret = 0;
     bool flipFlop[2] = { false, true };
@@ -177,52 +138,41 @@
     // Use this run if we were told to only run a certain filter mode
-    if (gFilterModeToUse != (cl_filter_mode)-1
-        && imageSampler->filter_mode != gFilterModeToUse)
+    if( gFilterModeToUse != (cl_filter_mode)-1 && imageSampler->filter_mode != gFilterModeToUse )
         return 0;
     // Test normalized/non-normalized
-    for (normalizedIdx = 0; normalizedIdx < 2; normalizedIdx++)
+    for( normalizedIdx = 0; normalizedIdx < 2; normalizedIdx++ )
-        imageSampler->normalized_coords = flipFlop[normalizedIdx];
-        if (gNormalizedModeToUse != 7
-            && gNormalizedModeToUse != (int)imageSampler->normalized_coords)
+        imageSampler->normalized_coords = flipFlop[ normalizedIdx ];
+        if( gNormalizedModeToUse != 7 && gNormalizedModeToUse != (int)imageSampler->normalized_coords )
-        for (floatCoordIdx = 0; floatCoordIdx < 2; floatCoordIdx++)
+        for( floatCoordIdx = 0; floatCoordIdx < 2; floatCoordIdx++ )
-            // Checks added in because this case was leaking through causing a
-            // crash on CPU
-            if (!flipFlop[floatCoordIdx])
-                if (imageSampler->filter_mode != CL_FILTER_NEAREST
-                    || // integer coords can only be used with nearest
-                    flipFlop[normalizedIdx]) // Normalized integer coords makes
-                                             // no sense (they'd all be zero)
+            // Checks added in because this case was leaking through causing a crash on CPU
+            if( !flipFlop[ floatCoordIdx ] )
+                if( imageSampler->filter_mode != CL_FILTER_NEAREST      ||  // integer coords can only be used with nearest
+                   flipFlop[ normalizedIdx ])                               // Normalized integer coords makes no sense (they'd all be zero)
-            if (flipFlop[floatCoordIdx] && (gtestTypesToRun & kReadWriteTests))
+            if( flipFlop[ floatCoordIdx ] && (gtestTypesToRun & kReadWriteTests))
                 // sampler-less read in read_write tests run only integer coord
-            log_info("read_image (%s coords, %s results) "
-                     "*****************************\n",
-                     flipFlop[floatCoordIdx] ? (imageSampler->normalized_coords
-                                                    ? "normalized float"
-                                                    : "unnormalized float")
-                                             : "integer",
-                     get_explicit_type_name(outputType));
+            log_info( "read_image (%s coords, %s results) *****************************\n",
+                     flipFlop[ floatCoordIdx ] ? ( imageSampler->normalized_coords ? "normalized float" : "unnormalized float" ) : "integer",
+                     get_explicit_type_name( outputType ) );
-            for (unsigned int i = 0; i < formatList.size(); i++)
+            for( unsigned int i = 0; i < numFormats; i++ )
-                if (filterFlags[i]) continue;
+                if( filterFlags[i] )
+                    continue;
-                const cl_image_format &imageFormat = formatList[i];
+                cl_image_format &imageFormat = formatList[ i ];
-                ret |=
-                    test_read_image_type(device, context, queue, &imageFormat,
-                                         flipFlop[floatCoordIdx], imageSampler,
-                                         outputType, imageType);
+                ret |= test_read_image_type( device, context, queue, &imageFormat, flipFlop[ floatCoordIdx ], imageSampler, outputType, imageType );
@@ -230,74 +180,64 @@
-int test_image_set(cl_device_id device, cl_context context,
-                   cl_command_queue queue, test_format_set_fn formatTestFn,
-                   cl_mem_object_type imageType)
+int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, test_format_set_fn formatTestFn, cl_mem_object_type imageType )
     int ret = 0;
     static int printedFormatList = -1;
-    if ((imageType == CL_MEM_OBJECT_IMAGE3D)
-        && (formatTestFn == test_write_image_formats))
+    if ( ( 0 == is_extension_available( device, "cl_khr_3d_image_writes" )) && (imageType == CL_MEM_OBJECT_IMAGE3D) && (formatTestFn == test_write_image_formats) )
-        if (0 == is_extension_available(device, "cl_khr_3d_image_writes"))
+        gFailCount++;
+        log_error( "-----------------------------------------------------\n" );
+        log_error( "FAILED: test writing CL_MEM_OBJECT_IMAGE3D images\n" );
+        log_error( "This device does not support the mandated extension cl_khr_3d_image_writes.\n");
+        log_error( "-----------------------------------------------------\n\n" );
+        return -1;
+    }
+    if ( gTestMipmaps )
+    {
+        if ( 0 == is_extension_available( device, "cl_khr_mipmap_image" ))
-            log_info("-----------------------------------------------------\n");
-            log_info(
-                "This device does not support "
-                "cl_khr_3d_image_writes.\nSkipping 3d image write test. \n");
-            log_info(
-                "-----------------------------------------------------\n\n");
+            log_info( "-----------------------------------------------------\n" );
+            log_info( "This device does not support cl_khr_mipmap_image.\nSkipping mipmapped image test. \n" );
+            log_info( "-----------------------------------------------------\n\n" );
+            return 0;
+        }
+        if ( ( 0 == is_extension_available( device, "cl_khr_mipmap_image_writes" )) && (formatTestFn == test_write_image_formats))
+        {
+            log_info( "-----------------------------------------------------\n" );
+            log_info( "This device does not support cl_khr_mipmap_image_writes.\nSkipping mipmapped image write test. \n" );
+            log_info( "-----------------------------------------------------\n\n" );
             return 0;
-    if (gTestMipmaps)
-    {
-        if (0 == is_extension_available(device, "cl_khr_mipmap_image"))
-        {
-            log_info("-----------------------------------------------------\n");
-            log_info("This device does not support "
-                     "cl_khr_mipmap_image.\nSkipping mipmapped image test. \n");
-            log_info(
-                "-----------------------------------------------------\n\n");
-            return 0;
-        }
-        if ((0 == is_extension_available(device, "cl_khr_mipmap_image_writes"))
-            && (formatTestFn == test_write_image_formats))
-        {
-            log_info("-----------------------------------------------------\n");
-            log_info("This device does not support "
-                     "cl_khr_mipmap_image_writes.\nSkipping mipmapped image "
-                     "write test. \n");
-            log_info(
-                "-----------------------------------------------------\n\n");
-            return 0;
-        }
+    int version_check = (get_device_cl_version(device) < Version(1,2));
+    if (version_check != 0) {
+      switch (imageType) {
+        case CL_MEM_OBJECT_IMAGE1D:
+          test_missing_feature(version_check, "image_1D");
+          test_missing_feature(version_check, "image_1D_array");
+          test_missing_feature(version_check, "image_2D_array");
+      }
-    int version_check = (get_device_cl_version(device) < Version(1, 2));
-    if (version_check != 0)
-    {
-        switch (imageType)
-        {
-            case CL_MEM_OBJECT_IMAGE1D:
-                test_missing_feature(version_check, "image_1D");
-            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                test_missing_feature(version_check, "image_1D_array");
-            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                test_missing_feature(version_check, "image_2D_array");
-        }
-    }
+    // Grab the list of supported image formats for integer reads
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
     // This flag is only for querying the list of supported formats
     // The flag for creating image will be set explicitly in test functions
     cl_mem_flags flags;
     const char *flagNames;
-    if (formatTestFn == test_read_image_formats)
+    if( formatTestFn == test_read_image_formats )
-        if (gtestTypesToRun & kReadTests)
+        if(gtestTypesToRun & kReadTests)
             flags = CL_MEM_READ_ONLY;
             flagNames = "read";
@@ -310,7 +250,7 @@
-        if (gtestTypesToRun & kWriteTests)
+        if(gtestTypesToRun & kWriteTests)
             flags = CL_MEM_WRITE_ONLY;
             flagNames = "write";
@@ -322,30 +262,33 @@
-    // Grab the list of supported image formats for integer reads
-    std::vector<cl_image_format> formatList;
-    if (get_format_list(context, imageType, formatList, flags)) return -1;
+    if( get_format_list( context, imageType, formatList, numFormats, flags ) )
+        return -1;
+    BufferOwningPtr<cl_image_format> formatListBuf(formatList);
-    // First time through, we'll go ahead and print the formats supported,
-    // regardless of type
-    int test = imageType
-        | (formatTestFn == test_read_image_formats ? (1 << 16) : (1 << 17));
-    if (printedFormatList != test)
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
-        log_info("---- Supported %s %s formats for this device ---- \n",
-                 convert_image_type_to_string(imageType), flagNames);
-        for (unsigned int f = 0; f < formatList.size(); f++)
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    BufferOwningPtr<bool> filterFlagsBuf(filterFlags);
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    // First time through, we'll go ahead and print the formats supported, regardless of type
+    int test = imageType | (formatTestFn == test_read_image_formats ? (1 << 16) : (1 << 17));
+    if( printedFormatList != test )
+    {
+        log_info( "---- Supported %s %s formats for this device ---- \n", convert_image_type_to_string(imageType), flagNames );
+        for( unsigned int f = 0; f < numFormats; f++ )
-            if (IsChannelOrderSupported(formatList[f].image_channel_order)
-                && IsChannelTypeSupported(
-                    formatList[f].image_channel_data_type))
-                log_info(
-                    "  %-7s %-24s %d\n",
-                    GetChannelOrderName(formatList[f].image_channel_order),
-                    GetChannelTypeName(formatList[f].image_channel_data_type),
-                    (int)get_format_channel_count(&formatList[f]));
+            if ( IsChannelOrderSupported( formatList[ f ].image_channel_order ) && IsChannelTypeSupported( formatList[ f ].image_channel_data_type ) )
+                log_info( "  %-7s %-24s %d\n", GetChannelOrderName( formatList[ f ].image_channel_order ),
+                        GetChannelTypeName( formatList[ f ].image_channel_data_type ),
+                        (int)get_format_channel_count( &formatList[ f ] ) );
-        log_info("------------------------------------------- \n");
+        log_info( "------------------------------------------- \n" );
         printedFormatList = test;
@@ -355,9 +298,8 @@
         if (gTypesToTest & test.type)
-            std::vector<bool> filterFlags(formatList.size(), false);
-            if (filter_formats(formatList, filterFlags, test.channelTypes,
-                               gTestMipmaps)
+            if (filter_formats(formatList, filterFlags, numFormats,
+                               test.channelTypes, gTestMipmaps)
                 == 0)
                 log_info("No formats supported for %s type\n",;
@@ -366,7 +308,7 @@
                 imageSampler.filter_mode = CL_FILTER_NEAREST;
                 ret += formatTestFn(device, context, queue, formatList,
-                                    filterFlags, &imageSampler,
+                                    filterFlags, numFormats, &imageSampler,
                                     test.explicitType, imageType);
                 // Linear filtering is only supported with floats
@@ -374,7 +316,7 @@
                     imageSampler.filter_mode = CL_FILTER_LINEAR;
                     ret += formatTestFn(device, context, queue, formatList,
-                                        filterFlags, &imageSampler,
+                                        filterFlags, numFormats, &imageSampler,
                                         test.explicitType, imageType);
diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp
index c9ba4e8..e2e36a6 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -1,5 +1,5 @@
-// Copyright (c) 2017, 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -23,6 +23,21 @@
     #include <setjmp.h>
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+extern bool gDeviceLt20;
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
 const char *read1DKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n"
@@ -51,6 +66,8 @@
 static const char *samplerKernelArg = " sampler_t imageSampler,";
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData );
 template <class T> int determine_validation_error_1D( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
                                                 T *resultPtr, T * expected, float error,
                                 float x, float xAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
@@ -167,6 +184,8 @@
     return 0;
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float xfract, int normalized_coords, MTdata d, int lod)
     size_t i = 0;
@@ -209,6 +228,11 @@
+#ifndef MAX
+    #define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
 int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel kernel,
                         image_descriptor *imageInfo, image_sampler_data *imageSampler,
                        bool useFloatCoords, ExplicitType outputType, MTdata d )
@@ -370,14 +394,9 @@
     if( gDebugTrace )
         log_info( " - Creating kernel arguments...\n" );
-    xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width,
-                              xOffsetValues, &error);
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width, xOffsetValues, &error );
     test_error( error, "Unable to create x offset buffer" );
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             get_explicit_type_size(outputType) * 4
-                                 * imageInfo->width,
-                             NULL, &error);
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width, NULL, &error );
     test_error( error, "Unable to create result buffer" );
     // Create sampler to use
@@ -979,11 +998,8 @@
     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
-int test_read_image_set_1D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler, bool floatCoords,
-                           ExplicitType outputType)
+int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                        bool floatCoords, ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -1052,8 +1068,7 @@
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
index b3287de..eede817 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
@@ -1,5 +1,5 @@
-// Copyright (c) 2017, 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -22,6 +22,20 @@
 #include <setjmp.h>
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+extern bool gDeviceLt20;
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
 const char *read1DArrayKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n"
@@ -59,6 +73,12 @@
 static const char *samplerKernelArg = " sampler_t imageSampler,";
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                   int x, int y, int z, float *outData );
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                   int x, int y, int z, float *outData , int lod);
 template <class T> int determine_validation_error_1D_arr( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
                                                   T *resultPtr, T * expected, float error,
                                                   float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
@@ -198,6 +218,8 @@
     return 0;
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d , int lod)
     size_t i = 0;
@@ -255,6 +277,11 @@
+#ifndef MAX
+#define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
 int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_kernel kernel,
                              image_descriptor *imageInfo, image_sampler_data *imageSampler,
                              bool useFloatCoords, ExplicitType outputType, MTdata d )
@@ -441,22 +468,16 @@
     if( gDebugTrace )
         log_info( " - Creating kernel arguments...\n" );
-    xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->arraySize,
-                              xOffsetValues, &error);
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+        sizeof( cl_float ) * imageInfo->width * imageInfo->arraySize, xOffsetValues, &error );
     test_error( error, "Unable to create x offset buffer" );
-    yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->arraySize,
-                              yOffsetValues, &error);
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+        sizeof( cl_float ) * imageInfo->width * imageInfo->arraySize, yOffsetValues, &error );
     test_error( error, "Unable to create y offset buffer" );
-    results = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                             get_explicit_type_size(outputType) * 4
-                                 * imageInfo->width * imageInfo->arraySize,
-                             NULL, &error);
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+        get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->arraySize, NULL, &error );
     test_error( error, "Unable to create result buffer" );
     // Create sampler to use
@@ -1085,11 +1106,8 @@
     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
-int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 const cl_image_format *format,
-                                 image_sampler_data *imageSampler,
-                                 bool floatCoords, ExplicitType outputType)
+int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                                 bool floatCoords, ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -1158,8 +1176,7 @@
             gTestMipmaps ? ", lod" : "" );
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
     if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
index 7cb334b..79420b4 100644
--- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
@@ -1,5 +1,5 @@
-// Copyright (c) 2017, 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,6 +16,19 @@
 #include "test_common.h"
 #include <float.h>
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool         gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern bool         gUseKernelSamplers;
+extern cl_filter_mode   gFilterModeToUse;
+extern cl_addressing_mode   gAddressModeToUse;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+extern bool gDeviceLt20;
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
 static size_t reduceImageSizeRange(size_t maxDimSize) {
@@ -75,6 +88,7 @@
 static const char *samplerKernelArg = " sampler_t imageSampler,";
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
 template <class T> int determine_validation_error_offset_2D_array( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
                                                          T *resultPtr, T * expected, float error,
                                                          float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
@@ -216,6 +230,8 @@
     return 0;
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d , int lod)
     size_t i = 0;
@@ -292,6 +308,10 @@
+#ifndef MAX
+#define MAX(_a, _b)             ((_a) > (_b) ? (_a) : (_b))
 int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_kernel kernel,
                        image_descriptor *imageInfo, image_sampler_data *imageSampler,
                        bool useFloatCoords, ExplicitType outputType, MTdata d )
@@ -458,26 +478,13 @@
-    xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->arraySize,
-                              xOffsetValues, &error);
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, xOffsetValues, &error );
     test_error( error, "Unable to create x offset buffer" );
-    yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->arraySize,
-                              yOffsetValues, &error);
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, yOffsetValues, &error );
     test_error( error, "Unable to create y offset buffer" );
-    zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->arraySize,
-                              zOffsetValues, &error);
+    zOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, zOffsetValues, &error );
     test_error( error, "Unable to create y offset buffer" );
-    results =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       get_explicit_type_size(outputType) * 4 * imageInfo->width
-                           * imageInfo->height * imageInfo->arraySize,
-                       NULL, &error);
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height * imageInfo->arraySize, NULL, &error );
     test_error( error, "Unable to create result buffer" );
     // Create sampler to use
@@ -1297,11 +1304,8 @@
     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
-int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 const cl_image_format *format,
-                                 image_sampler_data *imageSampler,
-                                 bool floatCoords, ExplicitType outputType)
+int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                           bool floatCoords, ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -1390,8 +1394,7 @@
             gTestMipmaps ? ", lod" : " " );
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
     // Run tests
diff --git a/test_conformance/images/kernel_read_write/test_read_3D.cpp b/test_conformance/images/kernel_read_write/test_read_3D.cpp
index 860114f..0b9e8de 100644
--- a/test_conformance/images/kernel_read_write/test_read_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_3D.cpp
@@ -1,5 +1,5 @@
-// Copyright (c) 2017, 2021 The Khronos Group Inc.
+// Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,6 +16,21 @@
 #include "test_common.h"
 #include <float.h>
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+extern bool gDeviceLt20;
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
 static size_t reduceImageSizeRange(size_t maxDimSize, RandomSeed& seed) {
@@ -73,12 +88,1083 @@
 static const char *samplerKernelArg = " sampler_t imageSampler,";
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
+template <class T> int determine_validation_error_offset( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                         T *resultPtr, T * expected, float error,
+                                                         float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
+    int actualX, actualY, actualZ;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, &actualZ, lod );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, clampedY, clampedZ;
-int test_read_image_set_3D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler, bool floatCoords,
-                           ExplicitType outputType)
+    size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, imageDepth = imageInfo->depth;
+    clamped = get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, imageWidth, imageHeight, imageDepth, imageSampler, imageInfo, clampedX, clampedY, clampedZ );
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX && clampedY == actualY && clampedZ == actualZ )
+        {
+            if( (--numClamped) == 0 )
+            {
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n",
+                              j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
+                              j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates!\n" );
+                return -1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n",
+                              j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
+                              j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color!\n" );
+                return -1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        /*        if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 || (int)y + (int)yOffsetValues[ j ] < 0 ) )
+         {
+         log_error( "NEGATIVE COORDINATE ERROR\n" );
+         return -1;
+         }
+         */
+        if( true ) // gExtraValidateInfo )
+        {
+            if( printAsFloat )
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\t     got (%g,%g,%g,%g), error of %g\n",
+                          j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                          (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+            }
+            else
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\t     got (%x,%x,%x,%x)\n",
+                          j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                          (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+            }
+            log_error( "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n", clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, (int)imageDepth );
+            if( printAsFloat && gExtraValidateInfo )
+            {
+                log_error( "\nNearby values:\n" );
+                for( int zOff = -1; zOff <= 1; zOff++ )
+                {
+                    for( int yOff = -1; yOff <= 1; yOff++ )
+                    {
+                        float top[ 4 ], real[ 4 ], bot[ 4 ];
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 , clampedY + yOff, clampedZ + zOff, top );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX ,clampedY + yOff, clampedZ + zOff, real );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, clampedZ + zOff, bot );
+                        log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] );
+                        log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                        log_error( " (%g,%g,%g,%g)\n",bot[0], bot[1], bot[2], bot[3] );
+                    }
+                }
+            }
+            //        }
+            //        else
+            //            log_error( "\n" );
+            if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+            {
+                if( found )
+                    log_error( "\tValue really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" );
+                else
+                    log_error( "\tValue not actually found in image\n" );
+            }
+            log_error( "\n" );
+        }
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+            return -1;
+    }
+    return 0;
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d , int lod)
+    size_t i = 0;
+    if( gDisableOffsets )
+    {
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) x);
+                    yOffsets[ i ] = (float) (yfract + (double) y);
+                    zOffsets[ i ] = (float) (zfract + (double) z);
+                }
+            }
+        }
+    }
+    else
+    {
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                    yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+                    zOffsets[ i ] = (float) (zfract + (double) ((int) z + random_in_range( -10, 10, d )));
+                }
+            }
+        }
+    }
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
+                    yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double) imageInfo->height - 1.0);
+                    zOffsets[ i ] = (float) CLAMP( (double) zOffsets[ i ], 0.0, (double) imageInfo->depth - 1.0);
+                }
+            }
+        }
+    }
+    if( normalized_coords || gTestMipmaps)
+    {
+        i = 0;
+        if (lod == 0)
+        {
+            for( size_t z = 0; z < imageInfo->depth; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                    {
+                        xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
+                        yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
+                        zOffsets[ i ] = (float) ((double) zOffsets[ i ] / (double) imageInfo->depth);
+                    }
+                }
+            }
+        }
+        else if (gTestMipmaps)
+        {
+            size_t width_lod, height_lod, depth_lod;
+            width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
+            height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
+            depth_lod = (imageInfo->depth >> lod)?(imageInfo->depth >> lod):1;
+            for( size_t z = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, i++ )
+                    {
+                        xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod);
+                        yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) height_lod);
+                        zOffsets[ i ] = (float) ((double) zOffsets[ i ] / (double) depth_lod);
+                    }
+                }
+            }
+        }
+    }
+#ifndef MAX
+#define MAX(_a, _b)             ((_a) > (_b) ? (_a) : (_b))
+int test_read_image_3D( cl_context context, cl_command_queue queue, cl_kernel kernel,
+                       image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                       bool useFloatCoords, ExplicitType outputType, MTdata d )
+    int error;
+    size_t threads[3];
+    static int initHalf = 0;
+    cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
+    clMemWrapper xOffsets, yOffsets, zOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+    // Create offset data
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+    BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+    BufferOwningPtr<cl_float> zOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+    if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+    if(gtestTypesToRun & kReadTests)
+    {
+        image_read_write_flags = CL_MEM_READ_ONLY;
+    }
+    else
+    {
+        image_read_write_flags = CL_MEM_READ_WRITE;
+    }
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+            generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+            unprotImage = create_image_3d(  context,
+                                            image_read_write_flags | CL_MEM_USE_HOST_PTR,
+                                            imageInfo->format,
+                                            imageInfo->width,
+                                            imageInfo->height,
+                                            imageInfo->depth,
+                                            ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                            ( gEnablePitch ? imageInfo->slicePitch : 0 ), maxImageUseHostPtrBackingStore, &error );
+        } else {
+            error = protImage.Create( context,
+                                    (cl_mem_flags)(image_read_write_flags),
+                                    imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_3d( context,
+                                      image_read_write_flags | CL_MEM_COPY_HOST_PTR,
+                                      imageInfo->format,
+                                      imageInfo->width,
+                                      imageInfo->height,
+                                      imageInfo->depth,
+                                      ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                      ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                      imageValues, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+        // it works just as if no flag is specified, so we just do the same thing either way
+        if ( !gTestMipmaps )
+        {
+            unprotImage = create_image_3d( context,
+                                          image_read_write_flags | gMemFlagsToUse,
+                                          imageInfo->format,
+                                          imageInfo->width, imageInfo->height, imageInfo->depth,
+                                          ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                          ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                          imageValues, &error );
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+                return error;
+            }
+            image = unprotImage;
+        }
+        else
+        {
+            cl_image_desc image_desc = {0};
+            image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+            image_desc.image_width = imageInfo->width;
+            image_desc.image_height = imageInfo->height;
+            image_desc.image_depth = imageInfo->depth;
+            image_desc.num_mip_levels = imageInfo->num_mip_levels;
+            unprotImage = clCreateImage( context,
+                                        image_read_write_flags,
+                                        imageInfo->format, &image_desc, NULL, &error);
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+                return error;
+            }
+            image = unprotImage;
+        }
+    }
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        size_t origin[ 4 ] = { 0, 0, 0, 0};
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+        if ( !gTestMipmaps )
+        {
+            error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                        origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0,
+                                        imageValues , 0, NULL, NULL);
+            if (error != CL_SUCCESS)
+            {
+                log_error( "ERROR: Unable to write to 3D image of size %d x %d x %d \n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+                return error;
+            }
+        }
+        else
+        {
+            int nextLevelOffset = 0;
+            for (int i =0; i < imageInfo->num_mip_levels; i++)
+            {   origin[3] = i;
+                error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                            origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
+                                            ((char*)imageValues + nextLevelOffset), 0, NULL, NULL);
+                if (error != CL_SUCCESS)
+                {
+                    log_error( "ERROR: Unable to write to %d level mipmapped 3D image of size %d x %d x %d\n", (int)imageInfo->num_mip_levels,(int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+                    return error;
+                }
+                nextLevelOffset += region[0]*region[1]*region[2]*get_pixel_size(imageInfo->format);
+                //Subsequent mip level dimensions keep halving
+                region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
+                region[1] = region[1] >> 1 ? region[1] >> 1 : 1;
+                region[2] = region[2] >> 1 ? region[2] >> 1 : 1;
+            }
+        }
+    }
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, yOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    zOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, zOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height * imageInfo->depth, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+    // Create sampler to use
+    actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
+    test_error(error, "Unable to create image sampler");
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &zOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+        log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }
+    // Get the maximum absolute error for this format
+    double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
+    if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+    int nextLevelOffset = 0;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
+    //Loop over all mipmap levels, if we are testing mipmapped images.
+    for(int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+    {
+        size_t resultValuesSize = width_lod * height_lod * depth_lod * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc( resultValuesSize ));
+        float lod_float = (float)lod;
+        if (gTestMipmaps) {
+            //Set the lod kernel arg
+            if(gDebugTrace)
+                log_info(" - Working at mip level %d\n", lod);
+            error = clSetKernelArg( kernel, idx, sizeof( float ), &lod_float);
+            test_error( error, "Unable to set kernel arguments" );
+        }
+    for( int q = 0; q < loopCount; q++ )
+    {
+        float offset = float_offsets[ q % float_offset_count ];
+        // Init the coordinates
+        InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues, zOffsetValues,
+                        q>=float_offset_count ? -offset: offset,
+                        q>=float_offset_count ? offset: -offset,
+                        q>=float_offset_count ? -offset: offset,
+                        imageSampler->normalized_coords, d, lod );
+        error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+        error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, yOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write y offsets" );
+        error = clEnqueueWriteBuffer( queue, zOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, zOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write z offsets" );
+        memset( resultValues, 0xff, resultValuesSize );
+        clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+        // Figure out thread dimensions
+        threads[0] = (size_t)width_lod;
+        threads[1] = (size_t)height_lod;
+        threads[2] = (size_t)depth_lod;
+        // Run the kernel
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+        // Get results
+        error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * depth_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+        // Validate results element by element
+        char *imagePtr = (char*)imageValues + nextLevelOffset;
+        /*
+         * FLOAT output type
+         */
+        if(is_sRGBA_order(imageInfo->format->image_channel_order) && (outputType == kFloat) )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU
+                            )
+                            offset = 0.0f;          // Loop only once
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals, lod );
+                                    float err1 =
+                                        ABS_ERROR(sRGBmap(resultPtr[0]),
+                                                  sRGBmap(expected[0]));
+                                    float err2 =
+                                        ABS_ERROR(sRGBmap(resultPtr[1]),
+                                                  sRGBmap(expected[1]));
+                                    float err3 =
+                                        ABS_ERROR(sRGBmap(resultPtr[2]),
+                                                  sRGBmap(expected[2]));
+                                    float err4 =
+                                        ABS_ERROR(resultPtr[3], expected[3]);
+                                    // Clamp to the minimum absolute error for the format
+                                    if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                                    if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                                    if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                                    if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                                    float maxErr = 0.5;
+                                    if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                    {
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero,
+                                            // max error needs to be adjusted
+                                              maxErr += 4 * FLT_MIN;
+                                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL, lod );
+                                            err1 =
+                                                ABS_ERROR(sRGBmap(resultPtr[0]),
+                                                          sRGBmap(expected[0]));
+                                            err2 =
+                                                ABS_ERROR(sRGBmap(resultPtr[1]),
+                                                          sRGBmap(expected[1]));
+                                            err3 =
+                                                ABS_ERROR(sRGBmap(resultPtr[2]),
+                                                          sRGBmap(expected[2]));
+                                            err4 = ABS_ERROR(resultPtr[3],
+                                                             expected[3]);
+                                        }
+                                    }
+                                    found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals, lod );
+                                        float err1 =
+                                            ABS_ERROR(sRGBmap(resultPtr[0]),
+                                                      sRGBmap(expected[0]));
+                                        float err2 =
+                                            ABS_ERROR(sRGBmap(resultPtr[1]),
+                                                      sRGBmap(expected[1]));
+                                        float err3 =
+                                            ABS_ERROR(sRGBmap(resultPtr[2]),
+                                                      sRGBmap(expected[2]));
+                                        float err4 = ABS_ERROR(resultPtr[3],
+                                                               expected[3]);
+                                        float maxErr = 0.6;
+                                        if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+                                                // If implementation decide to flush subnormals to zero,
+                                                // max error needs to be adjusted
+                                                  maxErr += 4 * FLT_MIN;
+                                                maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL, lod );
+                                                err1 = ABS_ERROR(
+                                                    sRGBmap(resultPtr[0]),
+                                                    sRGBmap(expected[0]));
+                                                err2 = ABS_ERROR(
+                                                    sRGBmap(resultPtr[1]),
+                                                    sRGBmap(expected[1]));
+                                                err3 = ABS_ERROR(
+                                                    sRGBmap(resultPtr[2]),
+                                                    sRGBmap(expected[2]));
+                                                err4 = ABS_ERROR(resultPtr[3],
+                                                                 expected[3]);
+                                            }
+                                        }
+                                        if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j,
+                                                                                                     numTries, numClamped, true, lod );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
+                                            log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( resultPtr[1], expected[1] ),
+                                                      Ulp_Error( resultPtr[2], expected[2] ),
+                                                      Ulp_Error( resultPtr[3], expected[3] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        /*
+         * FLOAT output type
+         */
+        else if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU
+                            )
+                            offset = 0.0f;          // Loop only once
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals, lod );
+                                    float err1 =
+                                        ABS_ERROR(resultPtr[0], expected[0]);
+                                    float err2 =
+                                        ABS_ERROR(resultPtr[1], expected[1]);
+                                    float err3 =
+                                        ABS_ERROR(resultPtr[2], expected[2]);
+                                    float err4 =
+                                        ABS_ERROR(resultPtr[3], expected[3]);
+                                    // Clamp to the minimum absolute error for the format
+                                    if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                                    if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                                    if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                                    if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                                    float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                    float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                    float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                    float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                    if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                    {
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero,
+                                            // max error needs to be adjusted
+                                              maxErr1 += 4 * FLT_MIN;
+                                            maxErr2 += 4 * FLT_MIN;
+                                            maxErr3 += 4 * FLT_MIN;
+                                            maxErr4 += 4 * FLT_MIN;
+                                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL, lod );
+                                            err1 = ABS_ERROR(resultPtr[0],
+                                                             expected[0]);
+                                            err2 = ABS_ERROR(resultPtr[1],
+                                                             expected[1]);
+                                            err3 = ABS_ERROR(resultPtr[2],
+                                                             expected[2]);
+                                            err4 = ABS_ERROR(resultPtr[3],
+                                                             expected[3]);
+                                        }
+                                    }
+                                    found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals, lod );
+                                        float err1 = ABS_ERROR(resultPtr[0],
+                                                               expected[0]);
+                                        float err2 = ABS_ERROR(resultPtr[1],
+                                                               expected[1]);
+                                        float err3 = ABS_ERROR(resultPtr[2],
+                                                               expected[2]);
+                                        float err4 = ABS_ERROR(resultPtr[3],
+                                                               expected[3]);
+                                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                        float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                        float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                        float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+                                                  maxErr1 += 4 * FLT_MIN;
+                                                maxErr2 += 4 * FLT_MIN;
+                                                maxErr3 += 4 * FLT_MIN;
+                                                maxErr4 += 4 * FLT_MIN;
+                                                maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL, lod );
+                                                err1 = ABS_ERROR(resultPtr[0],
+                                                                 expected[0]);
+                                                err2 = ABS_ERROR(resultPtr[1],
+                                                                 expected[1]);
+                                                err3 = ABS_ERROR(resultPtr[2],
+                                                                 expected[2]);
+                                                err4 = ABS_ERROR(resultPtr[3],
+                                                                 expected[3]);
+                                            }
+                                        }
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j,
+                                                                                                     numTries, numClamped, true, lod );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
+                                            log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( resultPtr[1], expected[1] ),
+                                                      Ulp_Error( resultPtr[2], expected[2] ),
+                                                      Ulp_Error( resultPtr[3], expected[3] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        /*
+         * UINT output type
+         */
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+                                    sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                            xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                            norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                            imageSampler, expected, lod );
+                                    error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+                                        sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                                xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                imageSampler, expected, lod );
+                                        error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                             expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                             norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                             j, numTries, numClamped, false, lod );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        else
+        /*
+         * INT output type
+         */
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+                                    sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                   xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                   norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                   imageSampler, expected, lod );
+                                    error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0 || NORM_OFFSET == 0 || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+                                        sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                       imageSampler, expected, lod );
+                                        error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                    expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                    norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                    j, numTries, numClamped, false, lod );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+                        resultPtr += 4;
+                        }
+                    }
+                }
+            }
+        }
+        {
+            nextLevelOffset += width_lod * height_lod * depth_lod * get_pixel_size(imageInfo->format);
+            width_lod = ( width_lod >> 1) ?( width_lod >> 1) : 1;
+            height_lod = ( height_lod >> 1) ?( height_lod >> 1) : 1;
+            depth_lod = ( depth_lod >> 1) ?( depth_lod >> 1) : 1;
+        }
+    }
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                           bool floatCoords, ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -148,8 +1234,7 @@
             gTestMipmaps? ",lod":" ");
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
     // Run tests
@@ -169,9 +1254,7 @@
                     if( gDebugTrace )
                         log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
-                    int retCode = test_read_image(
-                        context, queue, kernel, &imageInfo, imageSampler,
-                        floatCoords, outputType, seed);
+                    int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
                     if( retCode )
                         return retCode;
@@ -198,9 +1281,7 @@
             log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]);
             if( gDebugTrace )
                 log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
-            int retCode =
-                test_read_image(context, queue, kernel, &imageInfo,
-                                imageSampler, floatCoords, outputType, seed);
+            int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
             if( retCode )
                 return retCode;
@@ -214,9 +1295,7 @@
         imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
         imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
-        int retCode =
-            test_read_image(context, queue, kernel, &imageInfo, imageSampler,
-                            floatCoords, outputType, seed);
+        int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
         if( retCode )
             return retCode;
@@ -266,9 +1345,7 @@
                 if ( gTestMipmaps )
                     log_info( "   and number of mip levels :%d\n", (int)imageInfo.num_mip_levels );
-            int retCode =
-                test_read_image(context, queue, kernel, &imageInfo,
-                                imageSampler, floatCoords, outputType, seed);
+            int retCode = test_read_image_3D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
             if( retCode )
                 return retCode;
diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp
index 41983ed..ca02262 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp
@@ -19,8 +19,12 @@
 #include <sys/mman.h>
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
 extern cl_mem_flags gMemFlagsToUse;
 extern int gtestTypesToRun;
+extern bool gDeviceLt20;
 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor* imageInfo );
@@ -267,10 +271,8 @@
             clMemWrapper inputStream;
             char *imagePtrOffset = imageValues + nextLevelOffset;
-            inputStream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                         get_explicit_type_size(inputType) * 4
-                                             * width_lod,
-                                         imagePtrOffset, &error);
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod, imagePtrOffset, &error );
             test_error( error, "Unable to create input buffer" );
             // Set arguments
@@ -470,18 +472,8 @@
                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                     case CL_HALF_FLOAT:
-                                        log_error("    Expected: 0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultBuffer)[0],
-                                                  ((cl_half *)resultBuffer)[1],
-                                                  ((cl_half *)resultBuffer)[2],
-                                                  ((cl_half *)resultBuffer)[3]);
-                                        log_error("    Actual:   0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultPtr)[0],
-                                                  ((cl_half *)resultPtr)[1],
-                                                  ((cl_half *)resultPtr)[2],
-                                                  ((cl_half *)resultPtr)[3]);
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                     case CL_UNSIGNED_INT32:
@@ -521,10 +513,7 @@
     return totalErrors;
-int test_write_image_1D_set(cl_device_id device, cl_context context,
-                            cl_command_queue queue,
-                            const cl_image_format *format,
-                            ExplicitType inputType, MTdata d)
+int test_write_image_1D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d )
     char programSrc[10240];
     const char *ptr;
@@ -582,8 +571,7 @@
              gTestMipmaps ? ", lod" :"" );
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
     // Run tests
diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
index c771704..b91bf1c 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
@@ -19,8 +19,11 @@
 #include <sys/mman.h>
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
 extern cl_mem_flags gMemFlagsToUse;
 extern int gtestTypesToRun;
+extern bool gDeviceLt20;
 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
@@ -282,10 +285,8 @@
             clMemWrapper inputStream;
             char *imagePtrOffset = imageValues + nextLevelOffset;
-            inputStream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                         get_explicit_type_size(inputType) * 4
-                                             * width_lod * imageInfo->arraySize,
-                                         imagePtrOffset, &error);
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod * imageInfo->arraySize, imagePtrOffset, &error );
             test_error( error, "Unable to create input buffer" );
             // Set arguments
@@ -490,18 +491,8 @@
                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                     case CL_HALF_FLOAT:
-                                        log_error("    Expected: 0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultBuffer)[0],
-                                                  ((cl_half *)resultBuffer)[1],
-                                                  ((cl_half *)resultBuffer)[2],
-                                                  ((cl_half *)resultBuffer)[3]);
-                                        log_error("    Actual:   0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultPtr)[0],
-                                                  ((cl_half *)resultPtr)[1],
-                                                  ((cl_half *)resultPtr)[2],
-                                                  ((cl_half *)resultPtr)[3]);
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                     case CL_UNSIGNED_INT32:
@@ -542,10 +533,7 @@
-int test_write_image_1D_array_set(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  ExplicitType inputType, MTdata d)
+int test_write_image_1D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d )
     char programSrc[10240];
     const char *ptr;
@@ -605,8 +593,7 @@
              gTestMipmaps ? ", lod" :"" );
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
     // Run tests
diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
index 08a7a80..4524c6c 100644
--- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
@@ -19,8 +19,12 @@
 #include <sys/mman.h>
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
 extern cl_mem_flags gMemFlagsToUse;
 extern int gtestTypesToRun;
+extern bool gDeviceLt20;
 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
@@ -301,11 +305,8 @@
             clMemWrapper inputStream;
             char *imagePtrOffset = imageValues + nextLevelOffset;
-            inputStream =
-                clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                               get_explicit_type_size(inputType) * 4 * width_lod
-                                   * height_lod * imageInfo->arraySize,
-                               imagePtrOffset, &error);
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod * height_lod * imageInfo->arraySize, imagePtrOffset, &error );
             test_error( error, "Unable to create input buffer" );
             // Set arguments
@@ -513,20 +514,8 @@
                                             log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         case CL_HALF_FLOAT:
-                                            log_error(
-                                                "    Expected: 0x%4.4x 0x%4.4x "
-                                                "0x%4.4x 0x%4.4x\n",
-                                                ((cl_half *)resultBuffer)[0],
-                                                ((cl_half *)resultBuffer)[1],
-                                                ((cl_half *)resultBuffer)[2],
-                                                ((cl_half *)resultBuffer)[3]);
-                                            log_error(
-                                                "    Actual:   0x%4.4x 0x%4.4x "
-                                                "0x%4.4x 0x%4.4x\n",
-                                                ((cl_half *)resultPtr)[0],
-                                                ((cl_half *)resultPtr)[1],
-                                                ((cl_half *)resultPtr)[2],
-                                                ((cl_half *)resultPtr)[3]);
+                                            log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
                                             log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         case CL_UNSIGNED_INT32:
@@ -568,10 +557,7 @@
-int test_write_image_2D_array_set(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  ExplicitType inputType, MTdata d)
+int test_write_image_2D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d )
     char programSrc[10240];
     const char *ptr;
@@ -632,8 +618,7 @@
              gTestMipmaps ? ", lod" : "" );
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
     // Run tests
diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp
index 5cc96bb..7440bd6 100644
--- a/test_conformance/images/kernel_read_write/test_write_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp
@@ -19,8 +19,12 @@
 #include <sys/mman.h>
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
 extern cl_mem_flags gMemFlagsToUse;
 extern int gtestTypesToRun;
+extern bool gDeviceLt20;
 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
@@ -307,11 +311,8 @@
             clMemWrapper inputStream;
             char *imagePtrOffset = imageValues + nextLevelOffset;
-            inputStream =
-                clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                               get_explicit_type_size(inputType) * 4 * width_lod
-                                   * height_lod * depth_lod,
-                               imagePtrOffset, &error);
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod * height_lod * depth_lod, imagePtrOffset, &error );
             test_error( error, "Unable to create input buffer" );
             // Set arguments
@@ -520,20 +521,8 @@
                                             log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         case CL_HALF_FLOAT:
-                                            log_error(
-                                                "    Expected: 0x%4.4x 0x%4.4x "
-                                                "0x%4.4x 0x%4.4x\n",
-                                                ((cl_half *)resultBuffer)[0],
-                                                ((cl_half *)resultBuffer)[1],
-                                                ((cl_half *)resultBuffer)[2],
-                                                ((cl_half *)resultBuffer)[3]);
-                                            log_error(
-                                                "    Actual:   0x%4.4x 0x%4.4x "
-                                                "0x%4.4x 0x%4.4x\n",
-                                                ((cl_half *)resultPtr)[0],
-                                                ((cl_half *)resultPtr)[1],
-                                                ((cl_half *)resultPtr)[2],
-                                                ((cl_half *)resultPtr)[3]);
+                                            log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
                                             log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         case CL_UNSIGNED_INT32:
@@ -576,10 +565,7 @@
-int test_write_image_3D_set(cl_device_id device, cl_context context,
-                            cl_command_queue queue,
-                            const cl_image_format *format,
-                            ExplicitType inputType, MTdata d)
+int test_write_image_3D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d )
     char programSrc[10240];
     const char *ptr;
@@ -638,8 +624,7 @@
              gTestMipmaps ? ", lod" : "" );
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
     // Run tests
diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp
index e40e80d..f6d9235 100644
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -19,28 +19,16 @@
 #include <sys/mman.h>
-extern bool gTestImage2DFromBuffer;
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestImage2DFromBuffer, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
 extern cl_mem_flags gMemFlagsToUse;
 extern int gtestTypesToRun;
+extern bool gDeviceLt20;
-extern int test_write_image_1D_set(cl_device_id device, cl_context context,
-                                   cl_command_queue queue,
-                                   const cl_image_format *format,
-                                   ExplicitType inputType, MTdata d);
-extern int test_write_image_3D_set(cl_device_id device, cl_context context,
-                                   cl_command_queue queue,
-                                   const cl_image_format *format,
-                                   ExplicitType inputType, MTdata d);
-extern int test_write_image_1D_array_set(cl_device_id device,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         const cl_image_format *format,
-                                         ExplicitType inputType, MTdata d);
-extern int test_write_image_2D_array_set(cl_device_id device,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         const cl_image_format *format,
-                                         ExplicitType inputType, MTdata d);
+extern int test_write_image_1D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_3D_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_1D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_2D_array_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d );
 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
@@ -345,11 +333,8 @@
             char *imagePtrOffset = imageValues + nextLevelOffset;
-            inputStream =
-                clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                               get_explicit_type_size(inputType) * channel_scale
-                                   * width_lod * height_lod,
-                               imagePtrOffset, &error);
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * channel_scale * width_lod * height_lod, imagePtrOffset, &error );
             test_error( error, "Unable to create input buffer" );
             // Set arguments
@@ -553,18 +538,8 @@
                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                     case CL_HALF_FLOAT:
-                                        log_error("    Expected: 0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultBuffer)[0],
-                                                  ((cl_half *)resultBuffer)[1],
-                                                  ((cl_half *)resultBuffer)[2],
-                                                  ((cl_half *)resultBuffer)[3]);
-                                        log_error("    Actual:   0x%4.4x "
-                                                  "0x%4.4x 0x%4.4x 0x%4.4x\n",
-                                                  ((cl_half *)resultPtr)[0],
-                                                  ((cl_half *)resultPtr)[1],
-                                                  ((cl_half *)resultPtr)[2],
-                                                  ((cl_half *)resultPtr)[3]);
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                     case CL_UNSIGNED_INT32:
@@ -609,9 +584,7 @@
-int test_write_image_set(cl_device_id device, cl_context context,
-                         cl_command_queue queue, const cl_image_format *format,
-                         ExplicitType inputType, MTdata d)
+int test_write_image_set( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, ExplicitType inputType, MTdata d )
     char programSrc[10240];
     const char *ptr;
@@ -697,8 +670,7 @@
              gTestMipmaps ? ", lod" : "" );
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
     test_error( error, "Unable to create testing kernel" );
     // Run tests
@@ -813,13 +785,8 @@
     return 0;
-int test_write_image_formats(cl_device_id device, cl_context context,
-                             cl_command_queue queue,
-                             const std::vector<cl_image_format> &formatList,
-                             const std::vector<bool> &filterFlags,
-                             image_sampler_data *imageSampler,
-                             ExplicitType inputType,
-                             cl_mem_object_type imageType)
+int test_write_image_formats( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                             image_sampler_data *imageSampler, ExplicitType inputType, cl_mem_object_type imageType )
     if( imageSampler->filter_mode == CL_FILTER_LINEAR )
         // No need to run for linear filters
@@ -832,9 +799,9 @@
     RandomSeed seed( gRandomSeed );
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for( unsigned int i = 0; i < numFormats; i++ )
-        const cl_image_format &imageFormat = formatList[i];
+        cl_image_format &imageFormat = formatList[ i ];
         if( filterFlags[ i ] )
diff --git a/test_conformance/images/samplerlessReads/main.cpp b/test_conformance/images/samplerlessReads/main.cpp
index cd37779..cc882f3 100644
--- a/test_conformance/images/samplerlessReads/main.cpp
+++ b/test_conformance/images/samplerlessReads/main.cpp
@@ -13,11 +13,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include "../harness/compat.h"
 #include <stdio.h>
 #include <string.h>
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
 #include "../testBase.h"
-#include "../harness/compat.h"
 #include "../harness/fpcontrol.h"
 #include "../harness/parseParameters.h"
@@ -36,6 +42,9 @@
 cl_channel_type     gChannelTypeToUse = (cl_channel_type)-1;
 cl_channel_order    gChannelOrderToUse = (cl_channel_order)-1;
 bool                gEnablePitch = false;
+bool                gDeviceLt20 = false;
 static void printUsage( const char *execName );
@@ -151,8 +160,7 @@
     FPU_mode_type oldMode;
-    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
-                                      false, 0, verifyImageSupport);
+    int ret = runTestHarness( argCount, argList, test_num, test_list, true, false, 0 );
     // Restore FP state before leaving
@@ -178,7 +186,7 @@
     log_info( "You may also use appropriate CL_ channel type and ordering constants.\n" );
     log_info( "\n" );
     log_info( "\tThe following modify the types of images tested:\n" );
-    log_info( "\t\tread_write - Runs the tests with read_write images which allow a kernel do both read and write to the same image \n" );
+    log_info( "\t\read_write - Runs the tests with read_write images which allow a kernel do both read and write to the same image \n" );
     log_info( "\t\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
     log_info( "\t\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
     log_info( "\n" );
diff --git a/test_conformance/images/samplerlessReads/test_iterations.cpp b/test_conformance/images/samplerlessReads/test_iterations.cpp
index 55eaaf4..857fbc6 100644
--- a/test_conformance/images/samplerlessReads/test_iterations.cpp
+++ b/test_conformance/images/samplerlessReads/test_iterations.cpp
@@ -22,7 +22,14 @@
     #include <setjmp.h>
-extern bool gTestReadWrite;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool                 gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+extern bool                 gTestReadWrite;
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
 const char *read2DKernelSourcePattern =
 "__kernel void sample_kernel( read_only %s input, sampler_t sampler, __global int *results )\n"
@@ -176,11 +183,8 @@
     return 0;
-int test_read_image_set_2D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler,
-                           ExplicitType outputType)
+int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -197,11 +201,6 @@
     image_descriptor imageInfo = { 0 };
     size_t pixelSize;
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
     imageInfo.format = format;
     imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
     imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
@@ -256,8 +255,7 @@
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
     if ( gTestSmallImages )
diff --git a/test_conformance/images/samplerlessReads/test_loops.cpp b/test_conformance/images/samplerlessReads/test_loops.cpp
index db49a8f..e50d5d4 100644
--- a/test_conformance/images/samplerlessReads/test_loops.cpp
+++ b/test_conformance/images/samplerlessReads/test_loops.cpp
@@ -16,45 +16,24 @@
 #include "../testBase.h"
 #include "../common.h"
-extern int gTypesToTest;
-extern bool gTestReadWrite;
+extern int                  gTypesToTest;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_order     gChannelOrderToUse;
-extern int test_read_image_set_1D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  ExplicitType outputType);
-extern int test_read_image_set_1D_buffer(cl_device_id device,
-                                         cl_context context,
-                                         cl_command_queue queue,
-                                         const cl_image_format *format,
-                                         image_sampler_data *imageSampler,
-                                         ExplicitType outputType);
-extern int test_read_image_set_2D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  ExplicitType outputType);
-extern int test_read_image_set_3D(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  ExplicitType outputType);
-extern int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        const cl_image_format *format,
-                                        image_sampler_data *imageSampler,
-                                        ExplicitType outputType);
-extern int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        const cl_image_format *format,
-                                        image_sampler_data *imageSampler,
-                                        ExplicitType outputType);
+extern bool                 gDebugTrace;
+extern bool                 gDeviceLt20;
-int test_read_image_type(cl_device_id device, cl_context context,
-                         cl_command_queue queue, const cl_image_format *format,
-                         image_sampler_data *imageSampler,
-                         ExplicitType outputType, cl_mem_object_type imageType)
+extern bool                 gTestReadWrite;
+extern int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_1D_buffer( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+int test_read_image_type( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format,
+                          image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
     int ret = 0;
     imageSampler->addressing_mode = CL_ADDRESS_NONE;
@@ -95,25 +74,20 @@
     return ret;
-int test_read_image_formats(cl_device_id device, cl_context context,
-                            cl_command_queue queue,
-                            const std::vector<cl_image_format> &formatList,
-                            const std::vector<bool> &filterFlags,
-                            image_sampler_data *imageSampler,
-                            ExplicitType outputType,
-                            cl_mem_object_type imageType)
+int test_read_image_formats( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                             image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
     int ret = 0;
     imageSampler->normalized_coords = false;
     log_info( "read_image (%s coords, %s results) *****************************\n",
               "integer", get_explicit_type_name( outputType ) );
-    for (unsigned int i = 0; i < formatList.size(); i++)
+    for ( unsigned int i = 0; i < numFormats; i++ )
         if ( filterFlags[i] )
-        const cl_image_format &imageFormat = formatList[i];
+        cl_image_format &imageFormat = formatList[ i ];
         ret |= test_read_image_type( device, context, queue, &imageFormat, imageSampler, outputType, imageType );
@@ -127,50 +101,39 @@
     static int printedFormatList = -1;
     // Grab the list of supported image formats
-    std::vector<cl_image_format> formatList;
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+    auto version = get_device_cl_version(device);
+    if (version < Version(2, 0)) {
+        gDeviceLt20 = true;
-    std::vector<cl_image_format> readOnlyFormats;
-    if (get_format_list(context, imageType, readOnlyFormats, CL_MEM_READ_ONLY))
+    if (gDeviceLt20 && gTestReadWrite) {
+        log_info("TEST skipped, Opencl 2.0 + requried for this test");
+        return ret;
+    }
+    // This flag is only for querying the list of supported formats
+    // The flag for creating image will be set explicitly in test functions
+    cl_mem_flags flags = (gTestReadWrite)? CL_MEM_KERNEL_READ_AND_WRITE : CL_MEM_READ_ONLY;
+    if ( get_format_list( context, imageType, formatList, numFormats, flags ) )
         return -1;
-    if (gTestReadWrite)
+    filterFlags = new bool[ numFormats ];
+    if ( filterFlags == NULL )
-        std::vector<cl_image_format> readWriteFormats;
-        if (get_format_list(context, imageType, readWriteFormats,
-                            CL_MEM_KERNEL_READ_AND_WRITE))
-            return -1;
-        // Keep only intersecting formats with read only and read write flags
-        for (unsigned int i = 0; i < readOnlyFormats.size(); i++)
-        {
-            for (unsigned int j = 0; j < readWriteFormats.size(); j++)
-            {
-                if (readOnlyFormats[i].image_channel_data_type
-                        == readWriteFormats[j].image_channel_data_type
-                    && readOnlyFormats[i].image_channel_order
-                        == readWriteFormats[j].image_channel_order)
-                {
-                    formatList.push_back(readOnlyFormats[i]);
-                    break;
-                }
-            }
-        }
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
-    else
-    {
-        formatList = readOnlyFormats;
-    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
     // First time through, we'll go ahead and print the formats supported, regardless of type
     if ( printedFormatList != (int)imageType )
         log_info( "---- Supported %s read formats for this device ---- \n", convert_image_type_to_string(imageType) );
-        for (unsigned int f = 0; f < formatList.size(); f++)
+        for ( unsigned int f = 0; f < numFormats; f++ )
             log_info( "  %-7s %-24s %d\n", GetChannelOrderName( formatList[ f ].image_channel_order ),
                       GetChannelTypeName( formatList[ f ].image_channel_data_type ),
                       (int)get_format_channel_count( &formatList[ f ] ) );
@@ -184,8 +147,9 @@
         if (gTypesToTest & test.type)
-            std::vector<bool> filterFlags(formatList.size(), false);
-            if (filter_formats(formatList, filterFlags, test.channelTypes) == 0)
+            if (filter_formats(formatList, filterFlags, numFormats,
+                               test.channelTypes)
+                == 0)
                 log_info("No formats supported for %s type\n",;
@@ -193,11 +157,14 @@
                 imageSampler.filter_mode = CL_FILTER_NEAREST;
                 ret += test_read_image_formats(
-                    device, context, queue, formatList, filterFlags,
+                    device, context, queue, formatList, filterFlags, numFormats,
                     &imageSampler, test.explicitType, imageType);
+    delete[] filterFlags;
+    delete[] formatList;
     return ret;
diff --git a/test_conformance/images/samplerlessReads/test_read_1D.cpp b/test_conformance/images/samplerlessReads/test_read_1D.cpp
index aa261b7..173bc6f 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D.cpp
@@ -22,7 +22,14 @@
     #include <setjmp.h>
-extern bool gTestReadWrite;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool                 gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+extern bool                 gTestReadWrite;
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
 const char *read1DKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_t input, sampler_t sampler, __global int *results )\n"
@@ -177,11 +184,8 @@
     return 0;
-int test_read_image_set_1D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler,
-                           ExplicitType outputType)
+int test_read_image_set_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -192,11 +196,6 @@
     RandomSeed seed( gRandomSeed );
     int error;
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
     // Get our operating params
     size_t maxWidth;
     cl_ulong maxAllocSize, memSize;
@@ -254,8 +253,7 @@
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
     if ( gTestSmallImages )
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
index fb0c263..503a161 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
@@ -22,7 +22,14 @@
     #include <setjmp.h>
-extern bool gTestReadWrite;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool                 gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+extern bool                 gTestReadWrite;
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
 const char *read1DArrayKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_array_t input, sampler_t sampler, __global int *results )\n"
@@ -166,8 +173,6 @@
-    clReleaseMemObject(read_only_image);
@@ -175,11 +180,8 @@
     return 0;
-int test_read_image_set_1D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 const cl_image_format *format,
-                                 image_sampler_data *imageSampler,
-                                 ExplicitType outputType)
+int test_read_image_set_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -196,11 +198,6 @@
     image_descriptor imageInfo = { 0 };
     size_t pixelSize;
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
     imageInfo.format = format;
     imageInfo.height = imageInfo.depth = 0;
     imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
@@ -253,8 +250,7 @@
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
     if ( gTestSmallImages )
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
index 7a3084d..4cd02f0 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
@@ -22,6 +22,13 @@
     #include <setjmp.h>
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool             gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
 const char *read1DBufferKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_buffer_t inputA, read_only image1d_t inputB, sampler_t sampler, __global int *results )\n"
@@ -160,11 +167,8 @@
     return 0;
-int test_read_image_set_1D_buffer(cl_device_id device, cl_context context,
-                                  cl_command_queue queue,
-                                  const cl_image_format *format,
-                                  image_sampler_data *imageSampler,
-                                  ExplicitType outputType)
+int test_read_image_set_1D_buffer( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -246,8 +250,7 @@
              readFormat );
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
     if ( gTestSmallImages )
diff --git a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
index 99f2426..22fcffc 100644
--- a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
@@ -16,7 +16,11 @@
 #include "../testBase.h"
 #include <float.h>
-extern bool gTestReadWrite;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool             gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+extern bool             gTestReadWrite;
 const char *read2DArrayKernelSourcePattern =
 "__kernel void sample_kernel( read_only %s input, sampler_t sampler, __global int *results )\n"
@@ -161,11 +165,8 @@
     return 0;
-int test_read_image_set_2D_array(cl_device_id device, cl_context context,
-                                 cl_command_queue queue,
-                                 const cl_image_format *format,
-                                 image_sampler_data *imageSampler,
-                                 ExplicitType outputType)
+int test_read_image_set_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format,
+                                  image_sampler_data *imageSampler, ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -175,11 +176,6 @@
     int error;
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
     clProgramWrapper program;
     clKernelWrapper kernel;
@@ -243,8 +239,7 @@
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
diff --git a/test_conformance/images/samplerlessReads/test_read_3D.cpp b/test_conformance/images/samplerlessReads/test_read_3D.cpp
index cf41140..142d722 100644
--- a/test_conformance/images/samplerlessReads/test_read_3D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_3D.cpp
@@ -16,7 +16,11 @@
 #include "../testBase.h"
 #include <float.h>
-extern bool gTestReadWrite;
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+extern bool             gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gDeviceLt20;
+extern bool             gTestReadWrite;
 const char *read3DKernelSourcePattern =
 "__kernel void sample_kernel( read_only image3d_t input, sampler_t sampler, __global int *results )\n"
@@ -164,11 +168,8 @@
     return 0;
-int test_read_image_set_3D(cl_device_id device, cl_context context,
-                           cl_command_queue queue,
-                           const cl_image_format *format,
-                           image_sampler_data *imageSampler,
-                           ExplicitType outputType)
+int test_read_image_set_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format,
+                            image_sampler_data *imageSampler, ExplicitType outputType )
     char programSrc[10240];
     const char *ptr;
@@ -178,11 +179,6 @@
     int error;
-    if (gTestReadWrite && checkForReadWriteImageSupport(device))
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
     clProgramWrapper program;
     clKernelWrapper kernel;
@@ -246,8 +242,7 @@
     ptr = programSrc;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-                                        "sample_kernel");
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0" );
     test_error( error, "Unable to create testing kernel" );
diff --git a/test_conformance/images/testBase.h b/test_conformance/images/testBase.h
index ad48f10..f50f6b5 100644
--- a/test_conformance/images/testBase.h
+++ b/test_conformance/images/testBase.h
@@ -22,12 +22,6 @@
 #include "harness/clImageHelper.h"
 #include "harness/imageHelpers.h"
-extern bool gDebugTrace;
-extern bool gTestSmallImages;
-extern bool gEnablePitch;
-extern bool gTestMaxImages;
-extern bool gTestMipmaps;
 // Amount to offset pixels for checking normalized reads
 #define NORM_OFFSET 0.1f
@@ -64,22 +58,19 @@
     kAllTests = ( kReadTests | kWriteTests | kReadWriteTests )
-typedef int (*test_format_set_fn)(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    const std::vector<cl_image_format> &formatList,
-    const std::vector<bool> &filterFlags, image_sampler_data *imageSampler,
-    ExplicitType outputType, cl_mem_object_type imageType);
+typedef int (*test_format_set_fn)( cl_device_id device, cl_context context, cl_command_queue queue,
+  cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+  image_sampler_data *imageSampler, ExplicitType outputType,
+  cl_mem_object_type imageType );
-extern int test_read_image_formats(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    const std::vector<cl_image_format> &formatList,
-    const std::vector<bool> &filterFlags, image_sampler_data *imageSampler,
-    ExplicitType outputType, cl_mem_object_type imageType);
-extern int test_write_image_formats(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    const std::vector<cl_image_format> &formatList,
-    const std::vector<bool> &filterFlags, image_sampler_data *imageSampler,
-    ExplicitType outputType, cl_mem_object_type imageType);
+extern int test_read_image_formats( cl_device_id device, cl_context context, cl_command_queue queue,
+  cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+  image_sampler_data *imageSampler, ExplicitType outputType,
+  cl_mem_object_type imageType );
+extern int test_write_image_formats( cl_device_id device, cl_context context, cl_command_queue queue,
+  cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+  image_sampler_data *imageSampler, ExplicitType outputType,
+  cl_mem_object_type imageType );
 #endif // _testBase_h
diff --git a/test_conformance/integer_ops/main.cpp b/test_conformance/integer_ops/main.cpp
index 00e9166..1a8bad5 100644
--- a/test_conformance/integer_ops/main.cpp
+++ b/test_conformance/integer_ops/main.cpp
@@ -212,6 +212,6 @@
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/integer_ops/test_int_basic_ops.cpp b/test_conformance/integer_ops/test_int_basic_ops.cpp
index 519e5be..2d628d4 100644
--- a/test_conformance/integer_ops/test_int_basic_ops.cpp
+++ b/test_conformance/integer_ops/test_int_basic_ops.cpp
@@ -244,21 +244,18 @@
     (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
     pThis->m_output_ptr =
     (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
-    pThis->m_streams[0] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        pThis->m_type_size * num_elements * inputAVecSize, NULL, &err);
+    pThis->m_streams[0] =
+    clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * inputAVecSize, NULL, &err);
     test_error(err, "clCreateBuffer failed");
-    pThis->m_streams[1] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        pThis->m_type_size * num_elements * inputBVecSize, NULL, &err);
+    pThis->m_streams[1] =
+    clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * inputBVecSize, NULL, &err );
     test_error(err, "clCreateBuffer failed");
-    pThis->m_streams[2] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        pThis->m_type_size * num_elements * vectorSize, NULL, &err);
+    pThis->m_streams[2] =
+    clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * vectorSize, NULL, &err );
     test_error(err, "clCreateBuffer failed");
@@ -1448,21 +1445,13 @@
     generate_random_data( type, num_elements * inputBVecSize, s_randStates, input_ptr[ 1 ] );
     generate_random_bool_data( num_elements * inputCVecSize, s_randStates, (cl_char *)input_ptr[ 2 ], type_size );
-    streams[0] = clCreateBuffer(
-        context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-        type_size * num_elements * inputAVecSize, input_ptr[0], &err);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputAVecSize, input_ptr[0], &err);
     test_error(err, "clCreateBuffer failed");
-    streams[1] = clCreateBuffer(
-        context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-        type_size * num_elements * inputBVecSize, input_ptr[1], &err);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputBVecSize, input_ptr[1], &err );
     test_error(err, "clCreateBuffer failed");
-    streams[2] = clCreateBuffer(
-        context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-        type_size * num_elements * inputCVecSize, input_ptr[2], &err);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputCVecSize, input_ptr[2], &err );
     test_error(err, "clCreateBuffer failed");
-    streams[3] =
-        clCreateBuffer(context, CL_MEM_WRITE_ONLY,
-                       type_size * num_elements * vectorSize, NULL, &err);
+    streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_WRITE_ONLY), type_size * num_elements * vectorSize, NULL, &err );
     test_error(err, "clCreateBuffer failed");
     const char *vectorString = sizeNames[ vectorSize ];
diff --git a/test_conformance/integer_ops/test_integers.cpp b/test_conformance/integer_ops/test_integers.cpp
index 8d77b24..620582c 100644
--- a/test_conformance/integer_ops/test_integers.cpp
+++ b/test_conformance/integer_ops/test_integers.cpp
@@ -84,10 +84,11 @@
                 useOpKernel ? fnName : "", useOpKernel ? "" : fnName, sizeName,
                 sizeName );
+    bool isOpenCL20Function = (strcmp(fnName,"ctz") == 0)? true: false;
     /* Create kernels */
     programPtr = kernelSource;
-    if (create_single_kernel_helper(context, &program, &kernel, 1,
-                                    (const char **)&programPtr, "sample_test"))
+    if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test", isOpenCL20Function ? "-cl-std=CL2.0": "" ) )
         log_error("The program we attempted to compile was: \n%s\n", kernelSource);
         return -1;
@@ -96,9 +97,9 @@
     /* Generate some streams */
     generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataA );
-    streams[0] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecType) * vecSize * TEST_SIZE, inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
+                                inDataA, NULL);
     if( streams[0] == NULL )
         log_error("ERROR: Creating input array A failed!\n");
@@ -110,10 +111,9 @@
         // Op kernels use an r/w buffer for the second param, so we need to init it with data
         generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataB );
-    streams[1] = clCreateBuffer(
-        context, (CL_MEM_READ_WRITE | (useOpKernel ? CL_MEM_COPY_HOST_PTR : 0)),
-        get_explicit_type_size(vecType) * vecSize * TEST_SIZE,
-        (useOpKernel) ? &inDataB : NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | ( useOpKernel ? CL_MEM_COPY_HOST_PTR : 0 )),
+                                 get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
+                                ( useOpKernel ) ? &inDataB : NULL, NULL );
     if( streams[1] == NULL )
         log_error("ERROR: Creating output array failed!\n");
@@ -668,25 +668,25 @@
     generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA );
     generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
-    streams[0] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, &inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE,
+                                &inDataA, NULL);
     if( streams[0] == NULL )
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
-    streams[1] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecBType) * vecSize * TEST_SIZE, &inDataB, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                get_explicit_type_size( vecBType ) * vecSize * TEST_SIZE,
+                                &inDataB, NULL);
     if( streams[1] == NULL )
         log_error("ERROR: Creating input array B failed!\n");
         return -1;
-    streams[2] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                 get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE,
+                                 NULL, NULL );
     if( streams[2] == NULL )
         log_error("ERROR: Creating output array failed!\n");
@@ -1325,33 +1325,25 @@
     generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
     generate_random_data( vecCType, vecSize * TEST_SIZE, d, inDataC );
-    streams[0] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, &inDataA, NULL);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE, &inDataA, NULL);
     if( streams[0] == NULL )
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
-    streams[1] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecBType) * vecSize * TEST_SIZE, &inDataB, NULL);
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecBType ) * vecSize * TEST_SIZE, &inDataB, NULL);
     if( streams[1] == NULL )
         log_error("ERROR: Creating input array B failed!\n");
         return -1;
-    streams[2] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecCType) * vecSize * TEST_SIZE, &inDataC, NULL);
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecCType ) * vecSize * TEST_SIZE, &inDataC, NULL);
     if( streams[2] == NULL )
         log_error("ERROR: Creating input array C failed!\n");
         return -1;
-    streams[3] = clCreateBuffer(
-        context, CL_MEM_READ_WRITE,
-        get_explicit_type_size(destType) * vecSize * TEST_SIZE, NULL, NULL);
+    streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( destType ) * vecSize * TEST_SIZE, NULL, NULL );
     if( streams[3] == NULL )
         log_error("ERROR: Creating output array failed!\n");
diff --git a/test_conformance/integer_ops/test_unary_ops.cpp b/test_conformance/integer_ops/test_unary_ops.cpp
index 72940ea..0b4d0b8 100644
--- a/test_conformance/integer_ops/test_unary_ops.cpp
+++ b/test_conformance/integer_ops/test_unary_ops.cpp
@@ -90,9 +90,9 @@
     // Generate two streams. The first is our random data to test against, the second is our control stream
     generate_random_data( vecType, vecSize * TEST_SIZE, d, inData );
-    streams[0] = clCreateBuffer(
-        context, CL_MEM_COPY_HOST_PTR,
-        get_explicit_type_size(vecType) * vecSize * TEST_SIZE, inData, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                 get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
+                                 inData, &error );
     test_error( error, "Creating input data array failed" );
     cl_uint bits;
@@ -110,8 +110,8 @@
             // For addition ops, the MAX control value is 1. Otherwise, it's 3
             controlData[ i ] &= ~0x02;
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(controlData), controlData, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                sizeof( controlData ), controlData, &error );
     test_error( error, "Unable to create control stream" );
     // Assign streams and execute
diff --git a/test_conformance/integer_ops/test_upsample.cpp b/test_conformance/integer_ops/test_upsample.cpp
index 9ae3f0c..2fbbcc4 100644
--- a/test_conformance/integer_ops/test_upsample.cpp
+++ b/test_conformance/integer_ops/test_upsample.cpp
@@ -92,24 +92,19 @@
     /* Set up parameters */
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sourceATypeSize * sourceAVecSize * count, sourceA, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sourceATypeSize * sourceAVecSize * count, sourceA, NULL );
     if (!streams[0])
         log_error("ERROR: Creating input array A failed!\n");
         return -1;
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sourceBTypeSize * sourceBVecSize * count, sourceB, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sourceBTypeSize * sourceBVecSize * count, sourceB, NULL );
     if (!streams[1])
         log_error("ERROR: Creating input array B failed!\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, outStride * count,
-                                NULL, NULL);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), outStride * count, NULL, NULL );
     if (!streams[2])
         log_error("ERROR: Creating output array failed!\n");
diff --git a/test_conformance/integer_ops/verification_and_generation_functions.cpp b/test_conformance/integer_ops/verification_and_generation_functions.cpp
index 25fbe71..23f3bdb 100644
--- a/test_conformance/integer_ops/verification_and_generation_functions.cpp
+++ b/test_conformance/integer_ops/verification_and_generation_functions.cpp
@@ -1481,8 +1481,8 @@
 verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, cl_uchar *outptr, size_t n)
     cl_uchar r;
-    cl_uint shift_mask = vector_size == 1 ? (cl_uint)(sizeof(cl_uint) * 8) - 1
-                                          : (cl_uint)(sizeof(cl_uchar) * 8) - 1;
+    cl_uint  shift_mask = vector_size == 1 ? (cl_uint)(sizeof(cl_uint)*8)-1
+    : (cl_uint)(sizeof(cl_uchar)*8)-1;;
     size_t   i, j;
     int      count=0;
diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index d8dfc40..8818039 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -1,37 +1,35 @@
-    binary_double.cpp
-    binary_float.cpp
-    binary_i_double.cpp
-    binary_i_float.cpp
-    binary_operator_double.cpp
-    binary_operator_float.cpp
-    binary_two_results_i_double.cpp
-    binary_two_results_i_float.cpp
-    function_list.cpp
-    i_unary_double.cpp
-    i_unary_float.cpp
-    macro_binary_double.cpp
-    macro_binary_float.cpp
-    macro_unary_double.cpp
-    macro_unary_float.cpp
-    mad_double.cpp
-    mad_float.cpp
+    FunctionList.cpp
+    Sleep.cpp
+    binary.cpp
+    binaryOperator.cpp
+    Utility.cpp
+    binary_i.cpp
+    binary_two_results_i.cpp
+    i_unary.cpp
+    macro_binary.cpp
+    macro_unary.cpp
+    mad.cpp
-    sleep.cpp
-    ternary_double.cpp
-    ternary_float.cpp
-    unary_double.cpp
-    unary_float.cpp
-    unary_two_results_double.cpp
-    unary_two_results_float.cpp
-    unary_two_results_i_double.cpp
-    unary_two_results_i_float.cpp
-    unary_u_double.cpp
-    unary_u_float.cpp
-    utility.cpp
+    ternary.cpp
+    unary.cpp
+    unary_two_results.cpp
+    unary_two_results_i.cpp
+    unary_u.cpp
+    COMPILE_FLAGS -march=i686)
+        COMPILE_FLAGS -O0)
diff --git a/test_conformance/math_brute_force/FunctionList.cpp b/test_conformance/math_brute_force/FunctionList.cpp
new file mode 100644
index 0000000..30ee551
--- /dev/null
+++ b/test_conformance/math_brute_force/FunctionList.cpp
@@ -0,0 +1,205 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "FunctionList.h"
+#include "reference_math.h"
+#define FTZ_ON  1
+#define FTZ_OFF 0
+#define EXACT    0.0f
+#define RELAXED_ON 1
+#define RELAXED_OFF 0
+#define STRINGIFY( _s)                  #_s
+// Only use ulps information in spir test
+#define ENTRY(      _name, _ulp, _embedded_ulp, _rmode, _type )                 { STRINGIFY(_name), STRINGIFY(_name),                 {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+#define ENTRY_EXT(  _name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type )   { STRINGIFY(_name), STRINGIFY(_name),                 {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, RELAXED_ON,  _type }
+#define HALF_ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type )                 { "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)    { STRINGIFY(_name), _operator,                        {NULL}, {NULL}, {NULL}, _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+#define unaryF                NULL
+#define i_unaryF              NULL
+#define unaryF_u              NULL
+#define macro_unaryF          NULL
+#define binaryF               NULL
+#define binaryF_nextafter     NULL
+#define binaryOperatorF       NULL
+#define binaryF_i             NULL
+#define macro_binaryF         NULL
+#define ternaryF              NULL
+#define unaryF_two_results    NULL
+#define unaryF_two_results_i  NULL
+#define binaryF_two_results_i NULL
+#define mad_function          NULL
+#define reference_sqrt        NULL
+#define reference_sqrtl       NULL
+#define reference_divide      NULL
+#define reference_dividel     NULL
+#define reference_relaxed_divide NULL
+#define ENTRY(      _name, _ulp, _embedded_ulp, _rmode, _type )                 { STRINGIFY(_name), STRINGIFY(_name),                 {(void*)reference_##_name}, {(void*)reference_##_name##l}, {(void*)reference_##_name},           _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+#define ENTRY_EXT(  _name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type )   { STRINGIFY(_name), STRINGIFY(_name),                 {(void*)reference_##_name}, {(void*)reference_##_name##l}, {(void*)reference_##relaxed_##_name}, _ulp, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, RELAXED_ON,  _type }
+#define HALF_ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type )                 { "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), {(void*)reference_##_name}, {NULL}, {NULL},                   _ulp, _ulp, _embedded_ulp, INFINITY, _rmode, RELAXED_OFF, _type }
+#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)    { STRINGIFY(_name), _operator,                        {(void*)reference_##_name}, {(void*)reference_##_name##l}, {NULL},                               _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+extern const vtbl _unary;               // float foo( float )
+extern const vtbl _unary_u;             // float foo( uint ),  double foo( ulong )
+extern const vtbl _i_unary;             // int foo( float )
+extern const vtbl _macro_unary;         // int foo( float ),  returns {0,1} for scalar, { 0, -1 } for vector
+extern const vtbl _binary;              // float foo( float, float )
+extern const vtbl _binary_nextafter;    // float foo( float, float ), special handling for nextafter
+extern const vtbl _binary_operator;     // float .op. float
+extern const vtbl _macro_binary;        // int foo( float, float ), returns {0,1} for scalar, { 0, -1 } for vector
+extern const vtbl _binary_i;            // float foo( float, int )
+extern const vtbl _ternary;             // float foo( float, float, float )
+extern const vtbl _unary_two_results;   // float foo( float, float * )
+extern const vtbl _unary_two_results_i; // float foo( float, int * )
+extern const vtbl _binary_two_results_i; // float foo( float, float, int * )
+extern const vtbl _mad_tbl;             // float mad( float, float, float )
+#define unaryF &_unary
+#define i_unaryF &_i_unary
+#define unaryF_u  &_unary_u
+#define macro_unaryF &_macro_unary
+#define binaryF &_binary
+#define binaryF_nextafter &_binary_nextafter
+#define binaryOperatorF &_binary_operator
+#define binaryF_i &_binary_i
+#define macro_binaryF &_macro_binary
+#define ternaryF &_ternary
+#define unaryF_two_results  &_unary_two_results
+#define unaryF_two_results_i  &_unary_two_results_i
+#define binaryF_two_results_i  &_binary_two_results_i
+#define mad_function        &_mad_tbl
+const Func  functionList[] = {
+                                    ENTRY( acos,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( acosh,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( acospi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( asin,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( asinh,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( asinpi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atan,                  5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atanh,                 5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atanpi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atan2,                 6.0f,         6.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( atan2pi,               6.0f,         6.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( cbrt,                  2.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( ceil,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( copysign,              0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY_EXT( cos,               4.0f,         4.0f,        0.00048828125f,        FTZ_OFF,     unaryF), //relaxed ulp 2^-11
+                                    ENTRY( cosh,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( cospi,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+//                                  ENTRY( erfc,                  16.0f,         16.0f,         FTZ_OFF,     unaryF), //disabled for 1.0 due to lack of reference implementation
+//                                  ENTRY( erf,                   16.0f,         16.0f,         FTZ_OFF,     unaryF), //disabled for 1.0 due to lack of reference implementation
+                                    ENTRY_EXT( exp,               3.0f,         4.0f,       3.0f,       FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x))
+                                    ENTRY_EXT( exp2,              3.0f,         4.0f,       3.0f,       FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x))
+                                    ENTRY_EXT( exp10,             3.0f,         4.0f,       8192.0f,    FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x)) in derived mode,
+                                    // in non-derived mode it uses the ulp error for half_exp10.
+                                    ENTRY( expm1,                 3.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( fabs,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( fdim,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( floor,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( fma,                   0.0f,         0.0f,         FTZ_OFF,     ternaryF),
+                                    ENTRY( fmax,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( fmin,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( fmod,                  0.0f,         0.0f,         FTZ_OFF,     binaryF ),
+                                    ENTRY( fract,                 0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results),
+                                    ENTRY( frexp,                 0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results_i),
+                                    ENTRY( hypot,                 4.0f,         4.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( ilogb,                 0.0f,         0.0f,         FTZ_OFF,     i_unaryF),
+                                    ENTRY( isequal,               0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isfinite,              0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isgreater,             0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isgreaterequal,        0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isinf,                 0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isless,                0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( islessequal,           0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( islessgreater,         0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isnan,                 0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isnormal,              0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isnotequal,            0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isordered,             0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isunordered,           0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( ldexp,                 0.0f,         0.0f,         FTZ_OFF,     binaryF_i),
+                                    ENTRY( lgamma,            INFINITY,     INFINITY,         FTZ_OFF,     unaryF),
+                                    ENTRY( lgamma_r,          INFINITY,     INFINITY,         FTZ_OFF,     unaryF_two_results_i),
+                                    ENTRY_EXT( log,               3.0f,         4.0f,       4.76837158203125e-7f,   FTZ_OFF,    unaryF), //relaxed ulp 2^-21
+                                    ENTRY_EXT( log2,              3.0f,         4.0f,       4.76837158203125e-7f,   FTZ_OFF,    unaryF), //relaxed ulp 2^-21
+                                    ENTRY( log10,                 3.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( log1p,                 2.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( logb,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY_EXT( mad,           INFINITY,     INFINITY,        INFINITY,    FTZ_OFF,    mad_function), //in fast-relaxed-math mode it has to be either exactly rounded fma or exactly rounded a*b+c
+                                    ENTRY( maxmag,                0.0f,         0.0f,         FTZ_OFF,    binaryF ),
+                                    ENTRY( minmag,                0.0f,         0.0f,         FTZ_OFF,    binaryF ),
+                                    ENTRY( modf,                  0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results ),
+                                    ENTRY( nan,                   0.0f,         0.0f,         FTZ_OFF,     unaryF_u),
+                                    ENTRY( nextafter,             0.0f,         0.0f,         FTZ_OFF,     binaryF_nextafter),
+                                    ENTRY_EXT( pow,              16.0f,        16.0f,         8192.0f,     FTZ_OFF,    binaryF), //in derived mode the ulp error is calculated as exp2(y*log2(x)) and in non-derived it is the same as half_pow
+                                    ENTRY( pown,                 16.0f,        16.0f,         FTZ_OFF,     binaryF_i),
+                                    ENTRY( powr,                 16.0f,        16.0f,         FTZ_OFF,     binaryF),
+//                                  ENTRY( reciprocal,            1.0f,         1.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( remainder,             0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( remquo,                0.0f,         0.0f,         FTZ_OFF,     binaryF_two_results_i),
+                                    ENTRY( rint,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( rootn,                16.0f,        16.0f,         FTZ_OFF,     binaryF_i),
+                                    ENTRY( round,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( rsqrt,                 2.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( signbit,               0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY_EXT( sin,               4.0f,         4.0f,  0.00048828125f,     FTZ_OFF,    unaryF), //relaxed ulp 2^-11
+                                    ENTRY_EXT( sincos,            4.0f,         4.0f,  0.00048828125f,     FTZ_OFF,    unaryF_two_results), //relaxed ulp 2^-11
+                                    ENTRY( sinh,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( sinpi,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    { "sqrt", "sqrt",     {(void*)reference_sqrt}, {(void*)reference_sqrtl}, {NULL}, 3.0f, 0.0f,    4.0f, INFINITY, FTZ_OFF, RELAXED_OFF, unaryF },
+                                    { "sqrt_cr", "sqrt",  {(void*)reference_sqrt}, {(void*)reference_sqrtl}, {NULL}, 0.0f, 0.0f,    0.0f, INFINITY, FTZ_OFF, RELAXED_OFF, unaryF },
+                                    ENTRY_EXT( tan,               5.0f,         5.0f,         8192.0f,    FTZ_OFF,     unaryF), //in derived mode it the ulp error is calculated as sin/cos and in non-derived mode it is the same as half_tan.
+                                    ENTRY( tanh,                  5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( tanpi,                 6.0f,         6.0f,         FTZ_OFF,     unaryF),
+//                                    ENTRY( tgamma,                 16.0f,         16.0f,         FTZ_OFF,     unaryF), // Commented this out until we can be sure this requirement is realistic
+                                    ENTRY( trunc,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    HALF_ENTRY( cos,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( divide,        8192.0f,      8192.0f,          FTZ_ON,     binaryF),
+                                    HALF_ENTRY( exp,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( exp2,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( exp10,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( log,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( log2,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( log10,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( powr,          8192.0f,      8192.0f,          FTZ_ON,     binaryF),
+                                    HALF_ENTRY( recip,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( rsqrt,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( sin,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( sqrt,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( tan,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    // basic operations
+                                    OPERATOR_ENTRY( add, "+",         0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
+                                    OPERATOR_ENTRY( subtract, "-",     0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
+                                    { "divide", "/",  {(void*)reference_divide}, {(void*)reference_dividel}, {(void*)reference_relaxed_divide}, 2.5f, 0.0f,         3.0f, 2.5f, FTZ_OFF, RELAXED_ON, binaryOperatorF },
+                                    { "divide_cr", "/",  {(void*)reference_divide}, {(void*)reference_dividel}, {(void*)reference_relaxed_divide}, 0.0f, 0.0f,         0.0f, 0.f, FTZ_OFF, RELAXED_OFF, binaryOperatorF },
+                                    OPERATOR_ENTRY( multiply, "*",     0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
+                                    OPERATOR_ENTRY( assignment, "", 0.0f,       0.0f,     FTZ_OFF,     unaryF),        // A simple copy operation
+                                    OPERATOR_ENTRY( not, "!",       0.0f,       0.0f,   FTZ_OFF,    macro_unaryF),
+                                };
+const size_t functionListCount = sizeof( functionList ) / sizeof( functionList[0] );
diff --git a/test_conformance/math_brute_force/FunctionList.h b/test_conformance/math_brute_force/FunctionList.h
new file mode 100644
index 0000000..b2b0ec0
--- /dev/null
+++ b/test_conformance/math_brute_force/FunctionList.h
@@ -0,0 +1,98 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "harness/compat.h"
+#ifndef WIN32
+#include <unistd.h>
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
+    #include <CL/cl.h>
+#include "harness/mt19937.h"
+typedef union fptr
+    void    *p;
+    double  (*f_f)(double);
+    double  (*f_u)(cl_uint);
+    int     (*i_f)(double);
+    int     (*i_f_f)(float);
+    float   (*f_ff_f)(float, float);
+    double  (*f_ff)(double, double);
+    int     (*i_ff)(double, double);
+    double  (*f_fi)(double, int);
+    double  (*f_fpf)(double, double*);
+    double  (*f_fpI)(double, int*);
+    double  (*f_ffpI)(double, double, int*);
+    double  (*f_fff)(double, double, double );
+    float   (*f_fma)(float, float, float, int);
+typedef union dptr
+    void            *p;
+    long double     (*f_f)(long double);
+    long double     (*f_u)(cl_ulong);
+    int             (*i_f)(long double);
+    long double     (*f_ff)(long double, long double);
+    int             (*i_ff)(long double, long double);
+    long double     (*f_fi)(long double, int);
+    long double     (*f_fpf)(long double, long double*);
+    long double     (*f_fpI)(long double, int*);
+    long double     (*f_ffpI)(long double, long double, int*);
+    long double     (*f_fff)(long double, long double, long double);
+struct Func;
+typedef struct vtbl
+    const char  *type_name;
+    int         (*TestFunc)( const struct Func *, MTdata );
+    int         (*DoubleTestFunc)( const struct Func *, MTdata);        // may be NULL if function is single precision only
+typedef struct Func
+  const char      *name;              // common name, to be used as an argument in the shell
+  const char      *nameInCode;        // name as it appears in the __kernel, usually the same as name, but different for multiplication
+  fptr            func;
+  dptr            dfunc;
+  fptr            rfunc;
+  float           float_ulps;
+  float           double_ulps;
+  float           float_embedded_ulps;
+  float           relaxed_error;
+  int             ftz;
+  int             relaxed;
+  const vtbl      *vtbl_ptr;
+extern const Func  functionList[];
+extern const size_t functionListCount;
diff --git a/test_conformance/math_brute_force/Sleep.cpp b/test_conformance/math_brute_force/Sleep.cpp
new file mode 100644
index 0000000..4d3b2c6
--- /dev/null
+++ b/test_conformance/math_brute_force/Sleep.cpp
@@ -0,0 +1,118 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Sleep.h"
+#include "Utility.h"
+#if defined( __APPLE__ )
+    #include <IOKit/pwr_mgt/IOPMLib.h>
+    #include <IOKit/IOMessage.h>
+    struct
+    {
+        io_connect_t            connection;
+        IONotificationPortRef    port;
+        io_object_t                iterator;
+    }sleepInfo;
+    void sleepCallback(    void *            refcon,
+                        io_service_t        service,
+                        natural_t        messageType,
+                        void *            messageArgument );
+    void sleepCallback(    void *            refcon UNUSED,
+                        io_service_t        service UNUSED,
+                        natural_t        messageType,
+                        void *            messageArgument )
+    {
+        IOReturn result;
+    /*
+    service -- The IOService whose state has changed.
+    messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the IOService's family.
+    messageArgument -- An argument for the message, dependent on the messageType.
+    */
+        switch ( messageType )
+        {
+            case kIOMessageSystemWillSleep:
+                // Handle demand sleep (such as sleep caused by running out of
+                // batteries, closing the lid of a laptop, or selecting
+                // sleep from the Apple menu.
+                IOAllowPowerChange(sleepInfo.connection,(long)messageArgument);
+                vlog( "Hard sleep occurred.\n" );
+                break;
+            case kIOMessageCanSystemSleep:
+                // In this case, the computer has been idle for several minutes
+                // and will sleep soon so you must either allow or cancel
+                // this notification. Important: if you don’t respond, there will
+                // be a 30-second timeout before the computer sleeps.
+                // IOCancelPowerChange(root_port,(long)messageArgument);
+                result = IOCancelPowerChange(sleepInfo.connection,(long)messageArgument);
+                if( kIOReturnSuccess != result )
+                    vlog( "sleep prevention failed. (%d)\n", result);
+            break;
+            case kIOMessageSystemHasPoweredOn:
+                // Handle wakeup.
+                break;
+        }
+    }
+void PreventSleep( void )
+#if defined( __APPLE__ )
+    vlog( "Disabling sleep... " );
+    sleepInfo.iterator = (io_object_t) 0;
+    sleepInfo.port = NULL;
+    sleepInfo.connection = IORegisterForSystemPower
+                            (
+                                &sleepInfo,                    //void * refcon,
+                                &sleepInfo.port,            //IONotificationPortRef * thePortRef,
+                                sleepCallback,                //IOServiceInterestCallback callback,
+                                &sleepInfo.iterator            //io_object_t * notifier
+                            );
+    if( (io_connect_t) 0 == sleepInfo.connection )
+        vlog( "failed.\n" );
+    else
+        vlog( "done.\n" );
+    CFRunLoopAddSource(CFRunLoopGetCurrent(),
+                        IONotificationPortGetRunLoopSource(sleepInfo.port),
+                        kCFRunLoopDefaultMode);
+    vlog( "*** PreventSleep() is not implemented on this platform.\n" );
+void ResumeSleep( void )
+#if defined( __APPLE__ )
+    IOReturn result = IODeregisterForSystemPower ( &sleepInfo.iterator );
+    if( 0 != result )
+        vlog( "Got error %d restoring sleep \n", result );
+    else
+        vlog( "Sleep restored.\n" );
+    vlog( "*** ResumeSleep() is not implemented on this platform.\n" );
diff --git a/test_conformance/math_brute_force/Sleep.h b/test_conformance/math_brute_force/Sleep.h
new file mode 100644
index 0000000..f983a32
--- /dev/null
+++ b/test_conformance/math_brute_force/Sleep.h
@@ -0,0 +1,24 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef SLEEP_H
+#define SLEEP_H
+void PreventSleep( void );
+void ResumeSleep( void );
+#endif /* SLEEP_H */
diff --git a/test_conformance/math_brute_force/Utility.cpp b/test_conformance/math_brute_force/Utility.cpp
new file mode 100644
index 0000000..3c706fa
--- /dev/null
+++ b/test_conformance/math_brute_force/Utility.cpp
@@ -0,0 +1,169 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#if defined(__PPC__)
+// Global varaiable used to hold the FPU control register state. The FPSCR register can not
+// be used because not all Power implementations retain or observed the NI (non-IEEE
+// mode) bit.
+__thread fpu_control_t fpu_control = 0;
+void MulD(double *rhi, double *rlo, double u, double v)
+    const double c = 134217729.0; // 1+2^27
+    double up, u1, u2, vp, v1, v2;
+    up = u*c;
+    u1 = (u - up) + up;
+    u2 = u - u1;
+    vp = v*c;
+    v1 = (v - vp) + vp;
+    v2 = v - v1;
+    double rh = u*v;
+    double rl = (((u1*v1 - rh) + (u1*v2)) + (u2*v1)) + (u2*v2);
+    *rhi = rh;
+    *rlo = rl;
+void AddD(double *rhi, double *rlo, double a, double b)
+    double zhi, zlo;
+    zhi = a + b;
+    if(fabs(a) > fabs(b)) {
+        zlo = zhi - a;
+        zlo = b - zlo;
+    }
+    else {
+        zlo = zhi - b;
+        zlo = a - zlo;
+    }
+    *rhi = zhi;
+    *rlo = zlo;
+void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
+    double mh, ml;
+    double c = 134217729.0;
+    double up, u1, u2, vp, v1, v2;
+    up = xh*c;
+    u1 = (xh - up) + up;
+    u2 = xh - u1;
+    vp = yh*c;
+    v1 = (yh - vp) + vp;
+    v2 = yh - v1;
+    mh = xh*yh;
+    ml = (((u1*v1 - mh) + (u1*v2)) + (u2*v1)) + (u2*v2);
+    ml += xh*yl + xl*yh;
+    *rhi = mh + ml;
+    *rlo = (mh - (*rhi)) + ml;
+void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
+    double r, s;
+    r = xh + yh;
+    s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl);
+    *rhi = r + s;
+    *rlo = (r - (*rhi)) + s;
+void DivideDD(double *chi, double *clo, double a, double b)
+    *chi = a / b;
+    double rhi, rlo;
+    MulD(&rhi, &rlo, *chi, b);
+    AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0);
+    *clo = rhi / b;
+// These functions comapre two floats/doubles. Since some platforms may choose to
+// flush denormals to zeros before comparison, comparison like a < b may give wrong
+// result in "certain cases" where we do need correct compasion result when operands
+// are denormals .... these functions comapre floats/doubles using signed integer/long int
+// rep. In other cases, when flushing to zeros is fine, these should not be used.
+// Also these doesn't check for nans and assume nans are handled separately as special edge case
+// by the caller which calls these functions
+// return 0 if both are equal, 1 if x > y and -1 if x < y.
+int compareFloats(float x, float y)
+    int32f_t a, b;
+    a.f = x;
+    b.f = y;
+    if( a.i & 0x80000000 )
+        a.i = 0x80000000 - a.i;
+    if( b.i & 0x80000000 )
+        b.i = 0x80000000 - b.i;
+    if( a.i == b.i )
+        return 0;
+    return a.i < b.i ? -1 : 1;
+int compareDoubles(double x, double y)
+    int64d_t a, b;
+    a.d = x;
+    b.d = y;
+    if( a.l & 0x8000000000000000LL )
+        a.l = 0x8000000000000000LL - a.l;
+    if( b.l & 0x8000000000000000LL )
+        b.l = 0x8000000000000000LL - b.l;
+    if( a.l == b.l )
+        return 0;
+    return a.l < b.l ? -1 : 1;
+void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed)
+    char const *fpSizeStr = NULL;
+    char const *fpFastRelaxedStr = "";
+    switch (float_size) {
+    case sizeof(cl_double):
+        fpSizeStr = "fp64";
+        break;
+    case sizeof(cl_float):
+        fpSizeStr = "fp32";
+        break;
+    case sizeof(cl_half):
+        fpSizeStr = "fp16";
+        break;
+    }
+    if (isFastRelaxed) {
+        fpFastRelaxedStr = "rlx";
+    }
+    vlog("%15s %4s %4s",fname, fpSizeStr, fpFastRelaxedStr);
diff --git a/test_conformance/math_brute_force/Utility.h b/test_conformance/math_brute_force/Utility.h
new file mode 100644
index 0000000..9c14910
--- /dev/null
+++ b/test_conformance/math_brute_force/Utility.h
@@ -0,0 +1,254 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef UTILITY_H
+#define UTILITY_H
+#include "harness/compat.h"
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#include <CL/opencl.h>
+#include <stdio.h>
+#include "harness/rounding_mode.h"
+#include "harness/fpcontrol.h"
+#include "harness/testHarness.h"
+#include "harness/ThreadPool.h"
+#define BUFFER_SIZE         (1024*1024*2)
+#if defined( __GNUC__ )
+    #define UNUSED  __attribute__ ((unused))
+    #define UNUSED
+extern int gWimpyBufferSize;
+extern int gWimpyReductionFactor;
+#define VECTOR_SIZE_COUNT   6
+extern const char *sizeNames[VECTOR_SIZE_COUNT];
+extern const int   sizeValues[VECTOR_SIZE_COUNT];
+extern cl_device_id     gDevice;
+extern cl_context       gContext;
+extern cl_command_queue gQueue;
+extern void             *gIn;
+extern void             *gIn2;
+extern void             *gIn3;
+extern void             *gOut_Ref;
+extern void             *gOut_Ref2;
+extern void             *gOut[VECTOR_SIZE_COUNT];
+extern void             *gOut2[VECTOR_SIZE_COUNT];
+extern cl_mem           gInBuffer;
+extern cl_mem           gInBuffer2;
+extern cl_mem           gInBuffer3;
+extern cl_mem           gOutBuffer[VECTOR_SIZE_COUNT];
+extern cl_mem           gOutBuffer2[VECTOR_SIZE_COUNT];
+extern uint32_t         gComputeDevices;
+extern uint32_t         gSimdSize;
+extern int              gSkipCorrectnessTesting;
+extern int              gMeasureTimes;
+extern int              gReportAverageTimes;
+extern int              gForceFTZ;
+extern volatile int     gTestFastRelaxed;
+extern int              gFastRelaxedDerived;
+extern int              gWimpyMode;
+extern int              gHasDouble;
+extern int              gIsInRTZMode;
+extern int              gInfNanSupport;
+extern int              gIsEmbedded;
+extern int              gVerboseBruteForce;
+extern uint32_t         gMaxVectorSizeIndex;
+extern uint32_t         gMinVectorSizeIndex;
+extern uint32_t         gDeviceFrequency;
+extern cl_device_fp_config gFloatCapabilities;
+extern cl_device_fp_config gDoubleCapabilities;
+#define LOWER_IS_BETTER     0
+#define HIGHER_IS_BETTER    1
+#include "harness/errorHelpers.h"
+#if defined (_MSC_VER )
+    //Deal with missing scalbn on windows
+    #define scalbnf( _a, _i )       ldexpf( _a, _i )
+    #define scalbn( _a, _i )        ldexp( _a, _i )
+    #define scalbnl( _a, _i )       ldexpl( _a, _i )
+float Abs_Error( float test, double reference );
+float Ulp_Error( float test, double reference );
+//float Ulp_Error_Half( float test, double reference );
+float Bruteforce_Ulp_Error_Double( double test, long double reference );
+uint64_t GetTime( void );
+double SubtractTime( uint64_t endTime, uint64_t startTime );
+int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p );
+int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+// used to convert a bucket of bits into a search pattern through double
+static inline double DoubleFromUInt32( uint32_t bits );
+static inline double DoubleFromUInt32( uint32_t bits )
+    union{ uint64_t u; double d;} u;
+    // split 0x89abcdef to 0x89abc00000000def
+    u.u = bits & 0xfffU;
+    u.u |= (uint64_t) (bits & ~0xfffU) << 32;
+    // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
+    u.u -= (bits & 0x800U) << 1;
+    // return result
+    return u.d;
+void _LogBuildError( cl_program p, int line, const char *file );
+#define LogBuildError( program )        _LogBuildError( program, __LINE__, __FILE__ )
+#define PERF_LOOP_COUNT 100
+// Note: though this takes a double, this is for use with single precision tests
+static inline int IsFloatSubnormal( double x )
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf((float)x);
+    return (u.u-1) < 0x007fffffU;
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+static inline int IsDoubleSubnormal( long double x )
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs((double) x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
+//The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero.
+// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero.
+static inline int IsFloatResultSubnormal( double x, float ulps )
+    x = fabs(x) - MAKE_HEX_DOUBLE( 0x1.0p-149, 0x1, -149) * (double) ulps;
+    return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
+static inline int IsFloatResultSubnormalAbsError( double x , float abs_err)
+  x = x - abs_err;
+  return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
+static inline int IsDoubleResultSubnormal( long double x, float ulps )
+    x = fabsl(x) - MAKE_HEX_LONG( 0x1.0p-1074, 0x1, -1074) * (long double) ulps;
+    return x < MAKE_HEX_LONG( 0x1.0p-1022, 0x1, -1022 );
+static inline int IsFloatInfinity(double x)
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) == 0x7F800000U);
+static inline int IsFloatMaxFloat(double x)
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
+static inline int IsFloatNaN(double x)
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) > 0x7F800000U);
+extern cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
+// Windows (since long double got deprecated) sets the x87 to 53-bit precision
+// (that's x87 default state).  This causes problems with the tests that
+// convert long and ulong to float and double or otherwise deal with values
+// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
+static inline void Force64BitFPUPrecision(void)
+#if __MINGW32__
+    // The usual method is to use _controlfp as follows:
+    //     #include <float.h>
+    //     _controlfp(_PC_64, _MCW_PC);
+    //
+    // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
+    // divergent code just use inline assembly which works for both.
+    unsigned short int orig_cw = 0;
+    unsigned short int new_cw = 0;
+    __asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
+    new_cw = orig_cw | 0x0300;   // set precision to 64-bit
+    __asm__ __volatile__ ("fldcw  %0"::"m" (new_cw));
+#elif defined( _WIN32 ) && defined( __INTEL_COMPILER )
+    // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* work on win.x64:
+    // > On the x64 architecture, changing the floating point precision is not supported.
+    // (Taken from
+    int cw;
+    __asm { fnstcw cw };    // Get current value of FPU control word.
+    cw = cw & 0xfffffcff | ( 3 << 8 ); // Set Precision Control to Double Extended Precision.
+    __asm { fldcw cw };     // Set new value of FPU control word.
+    /* Implement for other platforms if needed */
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
+typedef union
+    int32_t i;
+    float   f;
+typedef union
+    int64_t l;
+    double  d;
+void MulD(double *rhi, double *rlo, double u, double v);
+void AddD(double *rhi, double *rlo, double a, double b);
+void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
+void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
+void DivideDD(double *chi, double *clo, double a, double b);
+int compareFloats(float x, float y);
+int compareDoubles(double x, double y);
+void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed);
+#endif /* UTILITY_H */
diff --git a/test_conformance/math_brute_force/binary.cpp b/test_conformance/math_brute_force/binary.cpp
new file mode 100644
index 0000000..4155a41
--- /dev/null
+++ b/test_conformance/math_brute_force/binary.cpp
@@ -0,0 +1,1556 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include "FunctionList.h"
+int TestFunc_Float_Float_Float(const Func *f, MTdata);
+int TestFunc_Double_Double_Double(const Func *f, MTdata);
+int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata);
+int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata);
+int TestFunc_Float_Float_Float_common(const Func *f, MTdata, int isNextafter);
+int TestFunc_Double_Double_Double_common(const Func *f, MTdata, int isNextafter);
+const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
+const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
+extern const vtbl _binary = { "binary", TestFunc_Float_Float_Float,
+                              TestFunc_Double_Double_Double };
+extern const vtbl _binary_nextafter = {
+    "binary_nextafter", TestFunc_Float_Float_Float_nextafter,
+    TestFunc_Double_Double_Double_nextafter
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = {     "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       f0 = ", name, "( f0, f1 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, f1 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = {     "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       d0 = ", name, "( d0, d1 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = ", name, "( d0, d1 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value (param 1).  Init to 0.
+    double      maxErrorValue2;                     // position of the max error value (param 2).  Init to 0.
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+typedef struct TestInfo
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+    int         isFDim;
+    int         skipNanInf;
+    int         isNextafter;
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+    int         skipTestingRelaxed = 0;
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = 1;
+    if (gWimpyMode){
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode );
+    test_info.skipNanInf = test_info.isFDim  && ! gInfNanSupport;
+    test_info.isNextafter = isNextafter;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf2 )
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer2 for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gOutBuffer[%d] for region {%zd, %zd}\n", (int) j, region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    // Run the kernels
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+        if( error )
+            goto exit;
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000;
+            p2[j] = 0x3fc00000;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata( test_info.tinfo[i].d );
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data  )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    fptr        func = job->f->func;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    cl_uchar    *overflow = (cl_uchar*)malloc(buffer_size);
+    const char  *name = job->f->name;
+    int         isFDim = job->isFDim;
+    int         skipNanInf = job->skipNanInf;
+    int         isNextafter = job->isNextafter;
+    cl_uint     *t = 0;
+    float       *r=0,*s=0,*s2=0;
+    cl_int copysign_test = 0;
+    RoundingMode oldRoundMode;
+    int skipVerification = 0;
+    if(gTestFastRelaxed)
+    {
+      if (strcmp(name,"pow")==0 && gFastRelaxedDerived)
+      {
+        func = job->f->rfunc;
+        ulps = INFINITY;
+        skipVerification = 1;
+      }else
+      {
+        func = job->f->rfunc;
+        ulps = job->f->relaxed_error;
+      }
+    }
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if (job_id <= (cl_uint)indx)
+    { // test edge cases
+        float *fp = (float *)p;
+        float *fp2 = (float *)p2;
+        uint32_t x, y;
+    x = (job_id * buffer_elements) % specialValuesFloatCount;
+    y = (job_id * buffer_elements) / specialValuesFloatCount;
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesFloat[x];
+            fp2[j] = specialValuesFloat[y];
+            if( ++x >= specialValuesFloatCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesFloatCount )
+                    break;
+            }
+        }
+    }
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int32(d);
+        p2[j] = genrand_int32(d);
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+    {
+        if( (error = clFinish(tinfo->tQueue)) )
+        {
+          vlog_error( "Error: clFinish failed! err: %d\n", error );
+          goto exit;
+        }
+        free(overflow);
+        return CL_SUCCESS;
+    }
+    FPU_mode_type oldMode;
+    oldRoundMode = kRoundToNearestEven;
+    if( isFDim )
+    {
+        //Calculate the correctly rounded reference result
+        memset( &oldMode, 0, sizeof( oldMode ) );
+        if( ftz )
+            ForceFTZ( &oldMode );
+        // Set the rounding mode to match the device
+        if (gIsInRTZMode)
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+    }
+    if(!strcmp(name, "copysign"))
+        copysign_test = 1;
+#define ref_func(s, s2) (copysign_test ? func.f_ff_f( s, s2 ) : func.f_ff( s, s2 ))
+    //Calculate the correctly rounded reference result
+    r = (float *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (float *)gIn2  + thread_id * buffer_elements;
+    if( skipNanInf )
+    {
+        for( j = 0; j < buffer_elements; j++ )
+        {
+            feclearexcept(FE_OVERFLOW);
+            r[j] = (float) ref_func( s[j], s2[j] );
+            overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+        }
+    }
+    else
+    {
+        for( j = 0; j < buffer_elements; j++ )
+            r[j] = (float) ref_func( s[j], s2[j] );
+    }
+    if( isFDim && ftz )
+        RestoreFPState( &oldMode );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+    if (!skipVerification) {
+        //Verify data
+        t = (cl_uint *)r;
+        for( j = 0; j < buffer_elements; j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                cl_uint *q = out[k];
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float test = ((float*) q)[j];
+                    double correct = ref_func( s[j], s2[j] );
+                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                    // As per OpenCL 2.0 spec, section, enabling fast-relaxed-math mode also enables
+                    // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                    // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                    if ( gTestFastRelaxed || skipNanInf)
+                    {
+                        if( skipNanInf && overflow[j])
+                            continue;
+                        // Note: no double rounding here.  Reference functions calculate in single precision.
+                        if( IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                            IsFloatInfinity(s2[j])   || IsFloatNaN(s2[j])       ||
+                            IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        )
+                            continue;
+                    }
+                    float err = Ulp_Error( test, correct );
+                    int fail = ! (fabsf(err) <= ulps);
+                    if( fail && ftz )
+                    {
+                        // retry per section
+                        if( IsFloatResultSubnormal(correct, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        // nextafter on FTZ platforms may return the smallest
+                        // normal float (2^-126) given a denormal or a zero
+                        // as the first argument. The rationale here is that
+                        // nextafter flushes the argument to zero and then
+                        // returns the next representable number in the
+                        // direction of the second argument, and since
+                        // denorms are considered as zero, the smallest
+                        // normal number is the next representable number.
+                        // In which case, it should have the same sign as the
+                        // second argument.
+                        if (isNextafter )
+                        {
+                            if(IsFloatSubnormal(s[j]) || s[j] == 0.0f)
+                            {
+                                float value = copysignf(twoToMinus126, s2[j]);
+                                fail = fail && (test != value);
+                                if (!fail)
+                                    err = 0.0f;
+                            }
+                        }
+                        else
+                        {
+                            // retry per section
+                            if( IsFloatSubnormal( s[j] ) )
+                            {
+                                double correct2, correct3;
+                                float err2, err3;
+                                if( skipNanInf )
+                                    feclearexcept(FE_OVERFLOW);
+                                correct2 = ref_func( 0.0, s2[j] );
+                                correct3 = ref_func( -0.0, s2[j] );
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                // As per OpenCL 2.0 spec, section, enabling fast-relaxed-math mode also enables
+                                // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                                // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                                if( gTestFastRelaxed || skipNanInf )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) && skipNanInf )
+                                        continue;
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                        IsFloatInfinity(correct3) || IsFloatNaN(correct3)    )
+                                        continue;
+                                }
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                // retry per section
+                                if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                                //try with both args as zero
+                                if( IsFloatSubnormal( s2[j] )  )
+                                {
+                                    double correct4, correct5;
+                                    float err4, err5;
+                                    if( skipNanInf )
+                                        feclearexcept(FE_OVERFLOW);
+                                    correct2 = ref_func( 0.0, 0.0 );
+                                    correct3 = ref_func( -0.0, 0.0 );
+                                    correct4 = ref_func( 0.0, -0.0 );
+                                    correct5 = ref_func( -0.0, -0.0 );
+                                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                    // As per OpenCL 2.0 spec, section, enabling fast-relaxed-math mode also enables
+                                    // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                                    // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                                    if( gTestFastRelaxed || skipNanInf )
+                                    {
+                                        if( fetestexcept(FE_OVERFLOW) && skipNanInf )
+                                            continue;
+                                        // Note: no double rounding here.  Reference functions calculate in single precision.
+                                        if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                            IsFloatInfinity(correct3) || IsFloatNaN(correct3)   ||
+                                            IsFloatInfinity(correct4) || IsFloatNaN(correct4)   ||
+                                            IsFloatInfinity(correct5) || IsFloatNaN(correct5)    )
+                                            continue;
+                                    }
+                                    err2 = Ulp_Error( test, correct2  );
+                                    err3 = Ulp_Error( test, correct3  );
+                                    err4 = Ulp_Error( test, correct4  );
+                                    err5 = Ulp_Error( test, correct5  );
+                                    fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                                     (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+                                    // retry per section
+                                    if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ||
+                                        IsFloatResultSubnormal( correct4, ulps ) || IsFloatResultSubnormal( correct5, ulps ) )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if(IsFloatSubnormal(s2[j]) )
+                            {
+                                double correct2, correct3;
+                                float err2, err3;
+                                if( skipNanInf )
+                                    feclearexcept(FE_OVERFLOW);
+                                correct2 = ref_func( s[j], 0.0 );
+                                correct3 = ref_func( s[j], -0.0 );
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                // As per OpenCL 2.0 spec, section, enabling fast-relaxed-math mode also enables
+                                // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                                // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                                if ( gTestFastRelaxed || skipNanInf )
+                                {
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( overflow[j] && skipNanInf)
+                                        continue;
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                        IsFloatInfinity(correct3) || IsFloatNaN(correct3)    )
+                                        continue;
+                                }
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                // retry per section
+                                if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > tinfo->maxError )
+                    {
+                        tinfo->maxError = fabsf(err);
+                        tinfo->maxErrorValue = s[j];
+                        tinfo->maxErrorValue2 = s2[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a (0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], ((cl_uint*)s)[j], s2[j], ((cl_uint*)s2)[j], r[j], test, ((cl_uint*)&test)[0], j );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+    }
+    if (isFDim && gIsInRTZMode)
+        (void)set_round(oldRoundMode, kfloat);
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements,  job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+    if( overflow )
+        free( overflow );
+    return error;
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafter)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = 1;
+    if (gWimpyMode){
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+    test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode );
+    test_info.skipNanInf = 0;
+    test_info.isNextafter = isNextafter;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+        if( error )
+            goto exit;
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    if( ! gSkipCorrectnessTesting )
+       vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata( test_info.tinfo[i].d );
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    dptr        func = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    int         isNextafter = job->isNextafter;
+    cl_ulong    *t;
+    cl_double   *r,*s,*s2;
+    Force64BitFPUPrecision();
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    //Init input array
+    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
+    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        cl_double *fp = (cl_double *)p;
+        cl_double *fp2 = (cl_double *)p2;
+        uint32_t x, y;
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesDouble[x];
+            fp2[j] = specialValuesDouble[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesDoubleCount )
+                    break;
+            }
+        }
+    }
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int64(d);
+        p2[j] = genrand_int64(d);
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+    //Calculate the correctly rounded reference result
+    r = (cl_double *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_double *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_ff( s[j], s2[j] );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+    //Verify data
+    t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_ff( s[j], s2[j] );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+                if( fail && ftz )
+                {
+                    // retry per section
+                    if( IsDoubleResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+                    // nextafter on FTZ platforms may return the smallest
+                    // normal float (2^-126) given a denormal or a zero
+                    // as the first argument. The rationale here is that
+                    // nextafter flushes the argument to zero and then
+                    // returns the next representable number in the
+                    // direction of the second argument, and since
+                    // denorms are considered as zero, the smallest
+                    // normal number is the next representable number.
+                    // In which case, it should have the same sign as the
+                    // second argument.
+                    if (isNextafter )
+                    {
+                        if(IsDoubleSubnormal(s[j]) || s[j] == 0.0f)
+                        {
+                            cl_double value = copysign(twoToMinus1022, s2[j]);
+                            fail = fail && (test != value);
+                            if (!fail)
+                                err = 0.0f;
+                        }
+                    }
+                    else
+                    {
+                        // retry per section
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            long double correct2 = func.f_ff( 0.0, s2[j] );
+                            long double correct3 = func.f_ff( -0.0, s2[j] );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                            //try with both args as zero
+                            if( IsDoubleSubnormal( s2[j] )  )
+                            {
+                                correct2 = func.f_ff( 0.0, 0.0 );
+                                correct3 = func.f_ff( -0.0, 0.0 );
+                                long double correct4 = func.f_ff( 0.0, -0.0 );
+                                long double correct5 = func.f_ff( -0.0, -0.0 );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                                 (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ||
+                                    IsDoubleResultSubnormal( correct4, ulps ) || IsDoubleResultSubnormal( correct5, ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if(IsDoubleSubnormal(s2[j]) )
+                        {
+                            long double correct2 = func.f_ff( s[j], 0.0 );
+                            long double correct3 = func.f_ff( s[j], -0.0 );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%.13la, %.13la}: *%.13la vs. %.13la\n", name, sizeNames[k], err, s[j], s2[j], r[j], test );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements,  job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+    return error;
+int TestFunc_Float_Float_Float(const Func *f, MTdata d)
+    return TestFunc_Float_Float_Float_common(f, d, 0);
+int TestFunc_Double_Double_Double(const Func *f, MTdata d)
+    return TestFunc_Double_Double_Double_common(f, d, 0);
+int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata d)
+    return TestFunc_Float_Float_Float_common(f, d, 1);
+int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata d)
+    return TestFunc_Double_Double_Double_common(f, d, 1);
diff --git a/test_conformance/math_brute_force/binaryOperator.cpp b/test_conformance/math_brute_force/binaryOperator.cpp
new file mode 100644
index 0000000..7676625
--- /dev/null
+++ b/test_conformance/math_brute_force/binaryOperator.cpp
@@ -0,0 +1,1462 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include "FunctionList.h"
+int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata);
+int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata);
+extern const vtbl _binary_operator = { "binaryOperator",
+                                       TestFunc_Float_Float_Float_Operator,
+                                       TestFunc_Double_Double_Double_Operator };
+static int BuildKernel( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = {
+                            "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   out[i] =  in1[i] ", operator_symbol, " in2[i];\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       f0 = f0 ", operator_symbol, " f1;\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = f0 ", operator_symbol, " f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+static int BuildKernelDouble( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = {
+                            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   out[i] =  in1[i] ", operator_symbol, " in2[i];\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+                            "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       d0 = d0 ", operator_symbol, " d1;\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = d0 ", operator_symbol, " d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *name;
+    const char  *operator_symbol;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->name, info->operator_symbol, i, info->kernel_count, info->kernels[i], info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->name, info->operator_symbol, i, info->kernel_count, info->kernels[i], info->programs + i );
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value (param 1).  Init to 0.
+    double      maxErrorValue2;                     // position of the max error value (param 2).  Init to 0.
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+typedef struct TestInfo
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+    // no special fields
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode) {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+        if( error )
+            goto exit;
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000;
+            p2[j] = 0x3fc00000;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    fptr        func = job->f->func;
+    if ( gTestFastRelaxed )
+    {
+      func = job->f->rfunc;
+    }
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    cl_uchar    *overflow = (cl_uchar*)malloc(buffer_size);
+    const char  *name = job->f->name;
+    cl_uint     *t;
+    cl_float    *r,*s,*s2;
+    RoundingMode oldRoundMode;
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx ) {
+        // Insert special values
+        uint32_t x, y;
+        x = (job_id * buffer_elements) % specialValuesFloatCount;
+        y = (job_id * buffer_elements) / specialValuesFloatCount;
+        for( ; j < buffer_elements; j++ ) {
+            p[j] = ((cl_uint *)specialValuesFloat)[x];
+            p2[j] = ((cl_uint *)specialValuesFloat)[y];
+            ++x;
+            if (x >= specialValuesFloatCount) {
+                x = 0;
+                y++;
+                if (y >= specialValuesFloatCount)
+                    break;
+            }
+            if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
+                cl_uint pj = p[j] & 0x7fffffff;
+                cl_uint p2j = p2[j] & 0x7fffffff;
+                // Replace values outside [2^-62, 2^62] with QNaN
+                if (pj < 0x20800000 || pj > 0x5e800000)
+                    p[j] = 0x7fc00000;
+                if (p2j < 0x20800000 || p2j > 0x5e800000)
+                    p2[j] = 0x7fc00000;
+            }
+        }
+    }
+    // Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int32(d);
+        p2[j] = genrand_int32(d);
+        if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
+            cl_uint pj = p[j] & 0x7fffffff;
+            cl_uint p2j = p2[j] & 0x7fffffff;
+            // Replace values outside [2^-62, 2^62] with QNaN
+            if (pj < 0x20800000 || pj > 0x5e800000)
+                p[j] = 0x7fc00000;
+            if (p2j < 0x20800000 || p2j > 0x5e800000)
+                p2[j] = 0x7fc00000;
+        }
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+    {
+        free( overflow );
+        return CL_SUCCESS;
+    }
+    //Calculate the correctly rounded reference result
+    FPU_mode_type oldMode;
+    memset( &oldMode, 0, sizeof( oldMode ) );
+    if( ftz )
+        ForceFTZ( &oldMode );
+    // Set the rounding mode to match the device
+    oldRoundMode = kRoundToNearestEven;
+    if (gIsInRTZMode)
+        oldRoundMode = set_round(kRoundTowardZero, kfloat);
+    //Calculate the correctly rounded reference result
+    r = (float *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (float *)gIn2  + thread_id * buffer_elements;
+    if( gInfNanSupport )
+    {
+        for( j = 0; j < buffer_elements; j++ )
+            r[j] = (float) func.f_ff( s[j], s2[j] );
+    }
+    else
+    {
+        for( j = 0; j < buffer_elements; j++ )
+        {
+            feclearexcept(FE_OVERFLOW);
+            r[j] = (float) func.f_ff( s[j], s2[j] );
+            overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+        }
+    }
+    if (gIsInRTZMode)
+      (void)set_round(oldRoundMode, kfloat);
+    if( ftz )
+        RestoreFPState( &oldMode );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+    //Verify data
+    t = (cl_uint *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_uint *q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                float test = ((float*) q)[j];
+                double correct = func.f_ff( s[j], s2[j] );
+                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                if ( !gInfNanSupport)
+                {
+                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                    if( overflow[j]                                         ||
+                        IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                        IsFloatInfinity(s2[j])   || IsFloatNaN(s2[j])       ||
+                        IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        )
+                        continue;
+                }
+        // Per section 10 paragraph 6, accept embedded devices always returning positive 0.0.
+        if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000)) continue;
+                float err = Ulp_Error( test, correct );
+                float errB = Ulp_Error( test, (float) correct  );
+                if( gTestFastRelaxed )
+                  ulps = job->f->relaxed_error;
+                int fail = ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps)));
+                if( fabsf( errB ) < fabsf(err ) )
+                  err = errB;
+                if( fail && ftz )
+                {
+                    // retry per section
+                    if( IsFloatResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+                    // retry per section
+                    if( IsFloatSubnormal( s[j] ) )
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+                        if( !gInfNanSupport )
+                            feclearexcept(FE_OVERFLOW);
+                        correct2 = func.f_ff( 0.0, s2[j] );
+                        correct3 = func.f_ff( -0.0, s2[j] );
+                        // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                        if( !gInfNanSupport )
+                        {
+                            if( fetestexcept(FE_OVERFLOW) )
+                                continue;
+                            // Note: no double rounding here.  Reference functions calculate in single precision.
+                            if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                IsFloatInfinity(correct3) || IsFloatNaN(correct3)    )
+                                continue;
+                        }
+                        err2 = Ulp_Error( test, correct2  );
+                        err3 = Ulp_Error( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+                        // retry per section
+                        if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        //try with both args as zero
+                        if( IsFloatSubnormal( s2[j] )  )
+                        {
+                            double correct4, correct5;
+                            float err4, err5;
+                            if( !gInfNanSupport )
+                                feclearexcept(FE_OVERFLOW);
+                            correct2 = func.f_ff( 0.0, 0.0 );
+                            correct3 = func.f_ff( -0.0, 0.0 );
+                            correct4 = func.f_ff( 0.0, -0.0 );
+                            correct5 = func.f_ff( -0.0, -0.0 );
+                            // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                            if( !gInfNanSupport )
+                            {
+                                if( fetestexcept(FE_OVERFLOW) )
+                                    continue;
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                    IsFloatInfinity(correct3) || IsFloatNaN(correct3)   ||
+                                    IsFloatInfinity(correct4) || IsFloatNaN(correct4)   ||
+                                    IsFloatInfinity(correct5) || IsFloatNaN(correct5)    )
+                                    continue;
+                            }
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            err4 = Ulp_Error( test, correct4  );
+                            err5 = Ulp_Error( test, correct5  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                             (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( fabsf( err4 ) < fabsf(err ) )
+                                err = err4;
+                            if( fabsf( err5 ) < fabsf(err ) )
+                                err = err5;
+                            // retry per section
+                            if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ||
+                                IsFloatResultSubnormal( correct4, ulps ) || IsFloatResultSubnormal( correct5, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    else if(IsFloatSubnormal(s2[j]) )
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+                        if( !gInfNanSupport )
+                            feclearexcept(FE_OVERFLOW);
+                        correct2 = func.f_ff( s[j], 0.0 );
+                        correct3 = func.f_ff( s[j], -0.0 );
+                        // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                        if ( !gInfNanSupport)
+                        {
+                            // Note: no double rounding here.  Reference functions calculate in single precision.
+                            if( overflow[j]                                         ||
+                                IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                                IsFloatInfinity(correct2)|| IsFloatNaN(correct2)    )
+                                continue;
+                        }
+                        err2 = Ulp_Error( test, correct2  );
+                        err3 = Ulp_Error( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+                        // retry per section
+                        if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], s2[j], r[j], test, ((cl_uint*)&test)[0], j );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step,  job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+    if( overflow )
+        free( overflow );
+    return error;
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+        if( error )
+            goto exit;
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    dptr        func = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_ulong    *t;
+    cl_double   *r,*s,*s2;
+    Force64BitFPUPrecision();
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    //Init input array
+    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
+    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        cl_double *fp = (cl_double *)p;
+        cl_double *fp2 = (cl_double *)p2;
+        uint32_t x, y;
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesDouble[x];
+            fp2[j] = specialValuesDouble[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesDoubleCount )
+                    break;
+            }
+        }
+    }
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int64(d);
+        p2[j] = genrand_int64(d);
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+    //Calculate the correctly rounded reference result
+    r = (cl_double *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_double *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_ff( s[j], s2[j] );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+    //Verify data
+    t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_ff( s[j], s2[j] );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+                if( fail && ftz )
+                {
+                    // retry per section
+                    if( IsDoubleResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+                    // retry per section
+                    if( IsDoubleSubnormal( s[j] ) )
+                    {
+                        long double correct2 = func.f_ff( 0.0, s2[j] );
+                        long double correct3 = func.f_ff( -0.0, s2[j] );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+                        // retry per section
+                        if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        //try with both args as zero
+                        if( IsDoubleSubnormal( s2[j] )  )
+                        {
+                            correct2 = func.f_ff( 0.0, 0.0 );
+                            correct3 = func.f_ff( -0.0, 0.0 );
+                            long double correct4 = func.f_ff( 0.0, -0.0 );
+                            long double correct5 = func.f_ff( -0.0, -0.0 );
+                            err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                            float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                             (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( fabsf( err4 ) < fabsf(err ) )
+                                err = err4;
+                            if( fabsf( err5 ) < fabsf(err ) )
+                                err = err5;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ||
+                                IsDoubleResultSubnormal( correct4, ulps ) || IsDoubleResultSubnormal( correct5, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    else if(IsDoubleSubnormal(s2[j]) )
+                    {
+                        long double correct2 = func.f_ff( s[j], 0.0 );
+                        long double correct3 = func.f_ff( s[j], -0.0 );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+                        // retry per section
+                        if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a\n", name, sizeNames[k], err, s[j], s2[j], r[j], test );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements,  job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
deleted file mode 100644
index 4baa499..0000000
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ /dev/null
@@ -1,827 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    double maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-    int isFDim;
-    int skipNanInf;
-    int isNextafter;
-    bool relaxedMode; // True if test is running in relaxed mode, false
-                      // otherwise.
-} TestInfo;
-// A table of more difficult cases to get right
-static const double specialValues[] = {
-    -NAN,
-    -DBL_MAX,
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
-    -1000.0,
-    -100.0,
-    -4.0,
-    -3.5,
-    -3.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
-    -2.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
-    -2.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
-    -1.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    -1.0,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53),
-    -0.5,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54),
-    -0.25,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-    -DBL_MIN,
-    MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-    -0.0,
-    +NAN,
-    +DBL_MAX,
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
-    +1000.0,
-    +100.0,
-    +4.0,
-    +3.5,
-    +3.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
-    +2.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
-    +2.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
-    +1.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    +1.0,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
-    +0.5,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
-    +0.25,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
-    +DBL_MIN,
-    MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
-    +0.0,
-static size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-    test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
-    test_info.skipNanInf = 0;
-    test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    float ulps = job->ulps;
-    dptr func = job->f->dfunc;
-    int ftz = job->ftz;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    int isNextafter = job->isNextafter;
-    cl_ulong *t;
-    cl_double *r;
-    cl_double *s;
-    cl_double *s2;
-    Force64BitFPUPrecision();
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Init input array
-    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
-    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        cl_double *fp = (cl_double *)p;
-        cl_double *fp2 = (cl_double *)p2;
-        uint32_t x, y;
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            fp2[idx] = specialValues[y];
-            if (++x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-        }
-    }
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int64(d);
-        p2[idx] = genrand_int64(d);
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-    // Calculate the correctly rounded reference result
-    r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
-    s = (cl_double *)gIn + thread_id * buffer_elements;
-    s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_ff(s[j], s2[j]);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-    // Verify data
-    t = (cl_ulong *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_ulong *q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_ff(s[j], s2[j]);
-                float err = Bruteforce_Ulp_Error_Double(test, correct);
-                int fail = !(fabsf(err) <= ulps);
-                if (fail && ftz)
-                {
-                    // retry per section
-                    if (IsDoubleResultSubnormal(correct, ulps))
-                    {
-                        fail = fail && (test != 0.0f);
-                        if (!fail) err = 0.0f;
-                    }
-                    // nextafter on FTZ platforms may return the smallest
-                    // normal float (2^-126) given a denormal or a zero
-                    // as the first argument. The rationale here is that
-                    // nextafter flushes the argument to zero and then
-                    // returns the next representable number in the
-                    // direction of the second argument, and since
-                    // denorms are considered as zero, the smallest
-                    // normal number is the next representable number.
-                    // In which case, it should have the same sign as the
-                    // second argument.
-                    if (isNextafter)
-                    {
-                        if (IsDoubleSubnormal(s[j]) || s[j] == 0.0f)
-                        {
-                            cl_double value = copysign(twoToMinus1022, s2[j]);
-                            fail = fail && (test != value);
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                    else
-                    {
-                        // retry per section
-                        if (IsDoubleSubnormal(s[j]))
-                        {
-                            long double correct2 = func.f_ff(0.0, s2[j]);
-                            long double correct3 = func.f_ff(-0.0, s2[j]);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            // retry per section
-                            if (IsDoubleResultSubnormal(correct2, ulps)
-                                || IsDoubleResultSubnormal(correct3, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                            // try with both args as zero
-                            if (IsDoubleSubnormal(s2[j]))
-                            {
-                                correct2 = func.f_ff(0.0, 0.0);
-                                correct3 = func.f_ff(-0.0, 0.0);
-                                long double correct4 = func.f_ff(0.0, -0.0);
-                                long double correct5 = func.f_ff(-0.0, -0.0);
-                                err2 =
-                                    Bruteforce_Ulp_Error_Double(test, correct2);
-                                err3 =
-                                    Bruteforce_Ulp_Error_Double(test, correct3);
-                                float err4 =
-                                    Bruteforce_Ulp_Error_Double(test, correct4);
-                                float err5 =
-                                    Bruteforce_Ulp_Error_Double(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= ulps))
-                                        && (!(fabsf(err3) <= ulps))
-                                        && (!(fabsf(err4) <= ulps))
-                                        && (!(fabsf(err5) <= ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-                                // retry per section
-                                if (IsDoubleResultSubnormal(correct2, ulps)
-                                    || IsDoubleResultSubnormal(correct3, ulps)
-                                    || IsDoubleResultSubnormal(correct4, ulps)
-                                    || IsDoubleResultSubnormal(correct5, ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                        }
-                        else if (IsDoubleSubnormal(s2[j]))
-                        {
-                            long double correct2 = func.f_ff(s[j], 0.0);
-                            long double correct3 = func.f_ff(s[j], -0.0);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            // retry per section
-                            if (IsDoubleResultSubnormal(correct2, ulps)
-                                || IsDoubleResultSubnormal(correct3, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                }
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                    tinfo->maxErrorValue2 = s2[j];
-                }
-                if (fail)
-                {
-                    vlog_error("\nERROR: %s%s: %f ulp error at {%.13la, "
-                               "%.13la}: *%.13la vs. %.13la\n",
-                               name, sizeNames[k], err, s[j], s2[j], r[j],
-                               test);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
deleted file mode 100644
index 32caafa..0000000
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ /dev/null
@@ -1,988 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    double maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-    int isFDim;
-    int skipNanInf;
-    int isNextafter;
-    bool relaxedMode; // True if test is running in relaxed mode, false
-                      // otherwise.
-} TestInfo;
-// A table of more difficult cases to get right
-static const float specialValues[] = {
-    -NAN,
-    -FLT_MAX,
-    MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
-    MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
-    MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
-    MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
-    MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
-    MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
-    MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
-    MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
-    MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
-    -1000.f,
-    -100.f,
-    -4.0f,
-    -3.5f,
-    -3.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
-    -2.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
-    -2.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
-    -1.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
-    -1.0f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
-    -0.5f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
-    -0.25f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
-    -FLT_MIN,
-    MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
-    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
-    MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
-    MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
-    MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
-    -0.0f,
-    +NAN,
-    +FLT_MAX,
-    MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
-    MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
-    MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
-    MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
-    MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
-    MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
-    MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
-    MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
-    MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
-    +1000.f,
-    +100.f,
-    +4.0f,
-    +3.5f,
-    +3.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
-    2.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
-    +2.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
-    1.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
-    +1.0f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
-    +0.5f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
-    +0.25f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
-    +FLT_MIN,
-    MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
-    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
-    MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
-    MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
-    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
-    +0.0f,
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    test_info.relaxedMode = relaxedMode;
-    test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
-    test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
-    test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    int ftz = job->ftz;
-    bool relaxedMode = job->relaxedMode;
-    float ulps = getAllowedUlpError(job->f, relaxedMode);
-    MTdata d = tinfo->d;
-    cl_int error;
-    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
-    const char *name = job->f->name;
-    int isFDim = job->isFDim;
-    int skipNanInf = job->skipNanInf;
-    int isNextafter = job->isNextafter;
-    cl_uint *t = 0;
-    cl_float *r = 0;
-    cl_float *s = 0;
-    cl_float *s2 = 0;
-    cl_int copysign_test = 0;
-    RoundingMode oldRoundMode;
-    int skipVerification = 0;
-    if (relaxedMode)
-    {
-        func = job->f->rfunc;
-        if (strcmp(name, "pow") == 0 && gFastRelaxedDerived)
-        {
-            ulps = INFINITY;
-            skipVerification = 1;
-        }
-    }
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Init input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        float *fp = (float *)p;
-        float *fp2 = (float *)p2;
-        uint32_t x, y;
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            fp2[idx] = specialValues[y];
-            ++x;
-            if (x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-        }
-    }
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int32(d);
-        p2[idx] = genrand_int32(d);
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting)
-    {
-        if ((error = clFinish(tinfo->tQueue)))
-        {
-            vlog_error("Error: clFinish failed! err: %d\n", error);
-            goto exit;
-        }
-        free(overflow);
-        return CL_SUCCESS;
-    }
-    FPU_mode_type oldMode;
-    oldRoundMode = kRoundToNearestEven;
-    if (isFDim)
-    {
-        // Calculate the correctly rounded reference result
-        memset(&oldMode, 0, sizeof(oldMode));
-        if (ftz) ForceFTZ(&oldMode);
-        // Set the rounding mode to match the device
-        if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
-    }
-    if (!strcmp(name, "copysign")) copysign_test = 1;
-#define ref_func(s, s2) (copysign_test ? func.f_ff_f(s, s2) : func.f_ff(s, s2))
-    // Calculate the correctly rounded reference result
-    r = (float *)gOut_Ref + thread_id * buffer_elements;
-    s = (float *)gIn + thread_id * buffer_elements;
-    s2 = (float *)gIn2 + thread_id * buffer_elements;
-    if (skipNanInf)
-    {
-        for (size_t j = 0; j < buffer_elements; j++)
-        {
-            feclearexcept(FE_OVERFLOW);
-            r[j] = (float)ref_func(s[j], s2[j]);
-            overflow[j] =
-                FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-        }
-    }
-    else
-    {
-        for (size_t j = 0; j < buffer_elements; j++)
-            r[j] = (float)ref_func(s[j], s2[j]);
-    }
-    if (isFDim && ftz) RestoreFPState(&oldMode);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-    if (!skipVerification)
-    {
-        // Verify data
-        t = (cl_uint *)r;
-        for (size_t j = 0; j < buffer_elements; j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                cl_uint *q = out[k];
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    float test = ((float *)q)[j];
-                    double correct = ref_func(s[j], s2[j]);
-                    // Per section 10 paragraph 6, accept any result if an input
-                    // or output is a infinity or NaN or overflow As per
-                    // OpenCL 2.0 spec, section, enabling
-                    // fast-relaxed-math mode also enables -cl-finite-math-only
-                    // optimization. This optimization allows to assume that
-                    // arguments and results are not NaNs or +/-INFs. Hence,
-                    // accept any result if inputs or results are NaNs or INFs.
-                    if (relaxedMode || skipNanInf)
-                    {
-                        if (skipNanInf && overflow[j]) continue;
-                        // Note: no double rounding here.  Reference functions
-                        // calculate in single precision.
-                        if (IsFloatInfinity(correct) || IsFloatNaN(correct)
-                            || IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j])
-                            || IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
-                            continue;
-                    }
-                    float err = Ulp_Error(test, correct);
-                    int fail = !(fabsf(err) <= ulps);
-                    if (fail && ftz)
-                    {
-                        // retry per section
-                        if (IsFloatResultSubnormal(correct, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                        // nextafter on FTZ platforms may return the smallest
-                        // normal float (2^-126) given a denormal or a zero
-                        // as the first argument. The rationale here is that
-                        // nextafter flushes the argument to zero and then
-                        // returns the next representable number in the
-                        // direction of the second argument, and since
-                        // denorms are considered as zero, the smallest
-                        // normal number is the next representable number.
-                        // In which case, it should have the same sign as the
-                        // second argument.
-                        if (isNextafter)
-                        {
-                            if (IsFloatSubnormal(s[j]) || s[j] == 0.0f)
-                            {
-                                float value = copysignf(twoToMinus126, s2[j]);
-                                fail = fail && (test != value);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                        else
-                        {
-                            // retry per section
-                            if (IsFloatSubnormal(s[j]))
-                            {
-                                double correct2, correct3;
-                                float err2, err3;
-                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                                correct2 = ref_func(0.0, s2[j]);
-                                correct3 = ref_func(-0.0, s2[j]);
-                                // Per section 10 paragraph 6, accept any result
-                                // if an input or output is a infinity or NaN or
-                                // overflow As per OpenCL 2.0 spec,
-                                // section, enabling fast-relaxed-math
-                                // mode also enables -cl-finite-math-only
-                                // optimization. This optimization allows to
-                                // assume that arguments and results are not
-                                // NaNs or +/-INFs. Hence, accept any result if
-                                // inputs or results are NaNs or INFs.
-                                if (relaxedMode || skipNanInf)
-                                {
-                                    if (fetestexcept(FE_OVERFLOW) && skipNanInf)
-                                        continue;
-                                    // Note: no double rounding here.  Reference
-                                    // functions calculate in single precision.
-                                    if (IsFloatInfinity(correct2)
-                                        || IsFloatNaN(correct2)
-                                        || IsFloatInfinity(correct3)
-                                        || IsFloatNaN(correct3))
-                                        continue;
-                                }
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= ulps))
-                                        && (!(fabsf(err3) <= ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                // retry per section
-                                if (IsFloatResultSubnormal(correct2, ulps)
-                                    || IsFloatResultSubnormal(correct3, ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-                                // try with both args as zero
-                                if (IsFloatSubnormal(s2[j]))
-                                {
-                                    double correct4, correct5;
-                                    float err4, err5;
-                                    if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                                    correct2 = ref_func(0.0, 0.0);
-                                    correct3 = ref_func(-0.0, 0.0);
-                                    correct4 = ref_func(0.0, -0.0);
-                                    correct5 = ref_func(-0.0, -0.0);
-                                    // Per section 10 paragraph 6, accept any
-                                    // result if an input or output is a
-                                    // infinity or NaN or overflow As per
-                                    // OpenCL 2.0 spec, section,
-                                    // enabling fast-relaxed-math mode also
-                                    // enables -cl-finite-math-only
-                                    // optimization. This optimization allows to
-                                    // assume that arguments and results are not
-                                    // NaNs or +/-INFs. Hence, accept any result
-                                    // if inputs or results are NaNs or INFs.
-                                    if (relaxedMode || skipNanInf)
-                                    {
-                                        if (fetestexcept(FE_OVERFLOW)
-                                            && skipNanInf)
-                                            continue;
-                                        // Note: no double rounding here.
-                                        // Reference functions calculate in
-                                        // single precision.
-                                        if (IsFloatInfinity(correct2)
-                                            || IsFloatNaN(correct2)
-                                            || IsFloatInfinity(correct3)
-                                            || IsFloatNaN(correct3)
-                                            || IsFloatInfinity(correct4)
-                                            || IsFloatNaN(correct4)
-                                            || IsFloatInfinity(correct5)
-                                            || IsFloatNaN(correct5))
-                                            continue;
-                                    }
-                                    err2 = Ulp_Error(test, correct2);
-                                    err3 = Ulp_Error(test, correct3);
-                                    err4 = Ulp_Error(test, correct4);
-                                    err5 = Ulp_Error(test, correct5);
-                                    fail = fail
-                                        && ((!(fabsf(err2) <= ulps))
-                                            && (!(fabsf(err3) <= ulps))
-                                            && (!(fabsf(err4) <= ulps))
-                                            && (!(fabsf(err5) <= ulps)));
-                                    if (fabsf(err2) < fabsf(err)) err = err2;
-                                    if (fabsf(err3) < fabsf(err)) err = err3;
-                                    if (fabsf(err4) < fabsf(err)) err = err4;
-                                    if (fabsf(err5) < fabsf(err)) err = err5;
-                                    // retry per section
-                                    if (IsFloatResultSubnormal(correct2, ulps)
-                                        || IsFloatResultSubnormal(correct3,
-                                                                  ulps)
-                                        || IsFloatResultSubnormal(correct4,
-                                                                  ulps)
-                                        || IsFloatResultSubnormal(correct5,
-                                                                  ulps))
-                                    {
-                                        fail = fail && (test != 0.0f);
-                                        if (!fail) err = 0.0f;
-                                    }
-                                }
-                            }
-                            else if (IsFloatSubnormal(s2[j]))
-                            {
-                                double correct2, correct3;
-                                float err2, err3;
-                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                                correct2 = ref_func(s[j], 0.0);
-                                correct3 = ref_func(s[j], -0.0);
-                                // Per section 10 paragraph 6, accept any result
-                                // if an input or output is a infinity or NaN or
-                                // overflow As per OpenCL 2.0 spec,
-                                // section, enabling fast-relaxed-math
-                                // mode also enables -cl-finite-math-only
-                                // optimization. This optimization allows to
-                                // assume that arguments and results are not
-                                // NaNs or +/-INFs. Hence, accept any result if
-                                // inputs or results are NaNs or INFs.
-                                if (relaxedMode || skipNanInf)
-                                {
-                                    // Note: no double rounding here.  Reference
-                                    // functions calculate in single precision.
-                                    if (overflow[j] && skipNanInf) continue;
-                                    if (IsFloatInfinity(correct2)
-                                        || IsFloatNaN(correct2)
-                                        || IsFloatInfinity(correct3)
-                                        || IsFloatNaN(correct3))
-                                        continue;
-                                }
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= ulps))
-                                        && (!(fabsf(err3) <= ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                // retry per section
-                                if (IsFloatResultSubnormal(correct2, ulps)
-                                    || IsFloatResultSubnormal(correct3, ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                        }
-                    }
-                    if (fabsf(err) > tinfo->maxError)
-                    {
-                        tinfo->maxError = fabsf(err);
-                        tinfo->maxErrorValue = s[j];
-                        tinfo->maxErrorValue2 = s2[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error(
-                            "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a "
-                            "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n",
-                            name, sizeNames[k], err, s[j], ((cl_uint *)s)[j],
-                            s2[j], ((cl_uint *)s2)[j], r[j], test,
-                            ((cl_uint *)&test)[0], j);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-    }
-    if (isFDim && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    if (overflow) free(overflow);
-    return error;
diff --git a/test_conformance/math_brute_force/binary_i.cpp b/test_conformance/math_brute_force/binary_i.cpp
new file mode 100644
index 0000000..a29a876
--- /dev/null
+++ b/test_conformance/math_brute_force/binary_i.cpp
@@ -0,0 +1,1233 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include <limits.h>
+#include "FunctionList.h"
+int TestFunc_Float_Float_Int(const Func *f, MTdata);
+int TestFunc_Double_Double_Int(const Func *f, MTdata);
+extern const vtbl _binary_i = { "binary_i", TestFunc_Float_Float_Int,
+                                TestFunc_Double_Double_Int };
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global int* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
+                            "       f0 = ", name, "( f0, i0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0;\n"
+                            "       int3 i0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = {     "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global int* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
+                            "       d0 = ", name, "( d0, i0 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0;\n"
+                            "       int3 i0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = ", name, "( d0, i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+static const int specialValuesInt[] = { 0, 1, 2, 3, 126, 127, 128, 0x02000001, 0x04000001, 1465264071, 1488522147,
+                                            -1, -2, -3, -126, -127, -128, -0x02000001, -0x04000001, -1465264071, -1488522147 };
+static size_t specialValuesIntCount = sizeof( specialValuesInt ) / sizeof( specialValuesInt[0] );
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value (param 1).  Init to 0.
+    cl_int      maxErrorValue2;                     // position of the max error value (param 2).  Init to 0.
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+typedef struct TestInfo
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+    // no special values
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+int TestFunc_Float_Float_Int(const Func *f, MTdata d)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    cl_int      maxErrorVal2 = 0;
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        cl_buffer_region region2 = { i * test_info.subBufferSize * sizeof( cl_int), test_info.subBufferSize * sizeof( cl_int) };
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    // Run the kernels
+    error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+    // Accumulate the arithmetic errors
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        if( test_info.tinfo[i].maxError > maxError )
+        {
+            maxError = test_info.tinfo[i].maxError;
+            maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+        }
+    }
+    if( error )
+        goto exit;
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = (genrand_int32(d) & ~0x40000000) | 0x38000000;
+            p2[j] = 3;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data  )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    fptr        func = job->f->func;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_uint     *t;
+    cl_float    *r,*s;
+    cl_int      *s2;
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesIntCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        float *fp = (float *)p;
+        cl_int *ip2 = (cl_int *)p2;
+        uint32_t x, y;
+    x = (job_id * buffer_elements) % specialValuesFloatCount;
+    y = (job_id * buffer_elements) / specialValuesFloatCount;
+        for( ; j < buffer_elements; j++ )
+        {
+          fp[j] = specialValuesFloat[x];
+          ip2[j] = specialValuesInt[y];
+            if( ++x >= specialValuesFloatCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesIntCount )
+                    break;
+            }
+        }
+    }
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+      p[j] = genrand_int32(d);
+      p2[j] = genrand_int32(d);
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+    //Calculate the correctly rounded reference result
+    r = (float *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_int *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (float) func.f_fi( s[j], s2[j] );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+    //Verify data
+    t = (cl_uint *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_uint *q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                float test = ((float*) q)[j];
+                double correct = func.f_fi( s[j], s2[j] );
+                float err = Ulp_Error( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+                if( fail && ftz )
+                {
+                    // retry per section
+                    if( IsFloatResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+                    // retry per section
+                    if( IsFloatSubnormal( s[j] ) )
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+                        correct2 = func.f_fi( 0.0, s2[j] );
+                        correct3 = func.f_fi( -0.0, s2[j] );
+                        err2 = Ulp_Error( test, correct2  );
+                        err3 = Ulp_Error( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+                        // retry per section
+                        if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %d}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], s2[j], r[j], test, ((cl_uint*)&test)[0], j );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step,  job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+    return error;
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+static const int specialValuesInt2[] = { 0, 1, 2, 3, 1022, 1023, 1024, INT_MIN, INT_MAX,
+                                            -1, -2, -3, -1022, -1023, -11024, -INT_MAX };
+static size_t specialValuesInt2Count = sizeof( specialValuesInt ) / sizeof( specialValuesInt[0] );
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+int TestFunc_Double_Double_Int(const Func *f, MTdata d)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    cl_int      maxErrorVal2 = 0;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        cl_buffer_region region2 = { i * test_info.subBufferSize * sizeof( cl_int), test_info.subBufferSize * sizeof( cl_int) };
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    // Run the kernels
+    if( !gSkipCorrectnessTesting )
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+    // Accumulate the arithmetic errors
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        if( test_info.tinfo[i].maxError > maxError )
+        {
+            maxError = test_info.tinfo[i].maxError;
+            maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+        }
+    }
+    if( error )
+        goto exit;
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        double *p = (double *)gIn;
+        cl_int *p2 = (cl_int *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = 3;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE/2, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    dptr        func = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_ulong    *t;
+    cl_double   *r,*s;
+    cl_int      *s2;
+    Force64BitFPUPrecision();
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    //Init input array
+    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
+    cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesInt2Count;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        cl_double *fp = (cl_double *)p;
+        cl_int *ip2 = (cl_int *)p2;
+        uint32_t x, y;
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesDouble[x];
+            ip2[j] = specialValuesInt2[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesInt2Count )
+                    break;
+            }
+        }
+    }
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = DoubleFromUInt32(genrand_int32(d));
+        p2[j] = genrand_int32(d);
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size/2, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+    //Calculate the correctly rounded reference result
+    r = (cl_double *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_int *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_fi( s[j], s2[j] );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+    //Verify data
+    t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_fi( s[j], s2[j] );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+                if( fail && ftz )
+                {
+                    // retry per section
+                    if( IsDoubleResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+                    // retry per section
+                    if( IsDoubleSubnormal( s[j] ) )
+                    {
+                        long double correct2 = func.f_fi( 0.0, s2[j] );
+                        long double correct3 = func.f_fi( -0.0, s2[j] );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+                        // retry per section
+                        if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%.13la, %d}: *%.13la vs. %.13la\n", name, sizeNames[k], err, s[j], s2[j], r[j], test );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
deleted file mode 100644
index 69e620a..0000000
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ /dev/null
@@ -1,746 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <climits>
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global int",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global int* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, i0 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       int3 i0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    cl_int maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-    // no special values
-} TestInfo;
-// A table of more difficult cases to get right
-static const double specialValues[] = {
-    -NAN,
-    -DBL_MAX,
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
-    -1000.0,
-    -100.0,
-    -4.0,
-    -3.5,
-    -3.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
-    -2.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
-    -2.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
-    -1.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    -1.0,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53),
-    -0.5,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54),
-    -0.25,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-    -DBL_MIN,
-    MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-    -0.0,
-    +NAN,
-    +DBL_MAX,
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
-    +1000.0,
-    +100.0,
-    +4.0,
-    +3.5,
-    +3.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
-    +2.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
-    +2.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
-    +1.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    +1.0,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
-    +0.5,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
-    +0.25,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
-    +DBL_MIN,
-    MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
-    +0.0,
-static size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-static const int specialValuesInt[] = {
-    0,       1,  2,  3,  1022,  1023,  1024,   INT_MIN,
-    INT_MAX, -1, -2, -3, -1022, -1023, -11024, -INT_MAX,
-static constexpr size_t specialValuesIntCount =
-    sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    cl_int maxErrorVal2 = 0;
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        cl_buffer_region region2 = { i * test_info.subBufferSize
-                                         * sizeof(cl_int),
-                                     test_info.subBufferSize * sizeof(cl_int) };
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    float ulps = job->ulps;
-    dptr func = job->f->dfunc;
-    int ftz = job->ftz;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    cl_ulong *t;
-    cl_double *r;
-    cl_double *s;
-    cl_int *s2;
-    Force64BitFPUPrecision();
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Init input array
-    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
-    cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
-    size_t idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        cl_double *fp = (cl_double *)p;
-        cl_int *ip2 = (cl_int *)p2;
-        uint32_t x, y;
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            ip2[idx] = specialValuesInt[y];
-            if (++x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesIntCount) break;
-            }
-        }
-    }
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = DoubleFromUInt32(genrand_int32(d));
-        p2[idx] = genrand_int32(d);
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size / 2, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-    // Calculate the correctly rounded reference result
-    r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
-    s = (cl_double *)gIn + thread_id * buffer_elements;
-    s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_fi(s[j], s2[j]);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-    // Verify data
-    t = (cl_ulong *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_ulong *q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_fi(s[j], s2[j]);
-                float err = Bruteforce_Ulp_Error_Double(test, correct);
-                int fail = !(fabsf(err) <= ulps);
-                if (fail && ftz)
-                {
-                    // retry per section
-                    if (IsDoubleResultSubnormal(correct, ulps))
-                    {
-                        fail = fail && (test != 0.0f);
-                        if (!fail) err = 0.0f;
-                    }
-                    // retry per section
-                    if (IsDoubleSubnormal(s[j]))
-                    {
-                        long double correct2 = func.f_fi(0.0, s2[j]);
-                        long double correct3 = func.f_fi(-0.0, s2[j]);
-                        float err2 =
-                            Bruteforce_Ulp_Error_Double(test, correct2);
-                        float err3 =
-                            Bruteforce_Ulp_Error_Double(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        // retry per section
-                        if (IsDoubleResultSubnormal(correct2, ulps)
-                            || IsDoubleResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                    tinfo->maxErrorValue2 = s2[j];
-                }
-                if (fail)
-                {
-                    vlog_error("\nERROR: %s%s: %f ulp error at {%.13la, %d}: "
-                               "*%.13la vs. %.13la\n",
-                               name, sizeNames[k], err, s[j], s2[j], r[j],
-                               test);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
deleted file mode 100644
index e65a9aa..0000000
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ /dev/null
@@ -1,740 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <climits>
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global int",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global int* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, i0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       int3 i0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    cl_int maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-    // no special values
-} TestInfo;
-// A table of more difficult cases to get right
-static const float specialValues[] = {
-    -NAN,
-    -FLT_MAX,
-    MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
-    MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
-    MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
-    MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
-    MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
-    MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
-    MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
-    MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
-    MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
-    -1000.f,
-    -100.f,
-    -4.0f,
-    -3.5f,
-    -3.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
-    -2.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
-    -2.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
-    -1.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
-    -1.0f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
-    -0.5f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
-    -0.25f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
-    -FLT_MIN,
-    MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
-    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
-    MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
-    MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
-    MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
-    -0.0f,
-    +NAN,
-    +FLT_MAX,
-    MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
-    MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
-    MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
-    MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
-    MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
-    MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
-    MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
-    MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
-    MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
-    +1000.f,
-    +100.f,
-    +4.0f,
-    +3.5f,
-    +3.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
-    2.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
-    +2.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
-    1.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
-    +1.0f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
-    +0.5f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
-    +0.25f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
-    +FLT_MIN,
-    MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
-    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
-    MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
-    MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
-    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
-    +0.0f,
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-static const int specialValuesInt[] = {
-    0,           1,           2,           3,          126,        127,
-    128,         0x02000001,  0x04000001,  1465264071, 1488522147, -1,
-    -2,          -3,          -126,        -127,       -128,       -0x02000001,
-    -0x04000001, -1465264071, -1488522147,
-static size_t specialValuesIntCount =
-    sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    cl_int maxErrorVal2 = 0;
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        cl_buffer_region region2 = { i * test_info.subBufferSize
-                                         * sizeof(cl_int),
-                                     test_info.subBufferSize * sizeof(cl_int) };
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    int ftz = job->ftz;
-    float ulps = job->ulps;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    cl_uint *t = 0;
-    cl_float *r = 0;
-    cl_float *s = 0;
-    cl_int *s2 = 0;
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Init input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    size_t idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        float *fp = (float *)p;
-        cl_int *ip2 = (cl_int *)p2;
-        uint32_t x, y;
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            ip2[idx] = specialValuesInt[y];
-            ++x;
-            if (x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesIntCount) break;
-            }
-        }
-    }
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int32(d);
-        p2[idx] = genrand_int32(d);
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-    // Calculate the correctly rounded reference result
-    r = (float *)gOut_Ref + thread_id * buffer_elements;
-    s = (float *)gIn + thread_id * buffer_elements;
-    s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (float)func.f_fi(s[j], s2[j]);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-    // Verify data
-    t = (cl_uint *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_uint *q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                float test = ((float *)q)[j];
-                double correct = func.f_fi(s[j], s2[j]);
-                float err = Ulp_Error(test, correct);
-                int fail = !(fabsf(err) <= ulps);
-                if (fail && ftz)
-                {
-                    // retry per section
-                    if (IsFloatResultSubnormal(correct, ulps))
-                    {
-                        fail = fail && (test != 0.0f);
-                        if (!fail) err = 0.0f;
-                    }
-                    // retry per section
-                    if (IsFloatSubnormal(s[j]))
-                    {
-                        double correct2, correct3;
-                        float err2, err3;
-                        correct2 = func.f_fi(0.0, s2[j]);
-                        correct3 = func.f_fi(-0.0, s2[j]);
-                        err2 = Ulp_Error(test, correct2);
-                        err3 = Ulp_Error(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        // retry per section
-                        if (IsFloatResultSubnormal(correct2, ulps)
-                            || IsFloatResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                    tinfo->maxErrorValue2 = s2[j];
-                }
-                if (fail)
-                {
-                    vlog_error(
-                        "\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: "
-                        "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %d\n",
-                        name, sizeNames[k], err, s[j], ((uint32_t *)s)[j],
-                        s2[j], r[j], ((uint32_t *)r)[j], test,
-                        ((cl_uint *)&test)[0], j);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
deleted file mode 100644
index 21e76c8..0000000
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ /dev/null
@@ -1,795 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *operator_symbol, int vectorSize,
-                       cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                       bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = in1[i] ",
-                        operator_symbol,
-                        " in2[i];\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       d0 = d0 ",
-        operator_symbol,
-        " d1;\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = d0 ",
-        operator_symbol,
-        " d1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *operator_symbol;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->operator_symbol, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    double maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-    bool relaxedMode; // True if the test is being run in relaxed mode, false
-                      // otherwise.
-    // no special fields
-} TestInfo;
-// A table of more difficult cases to get right
-static const double specialValues[] = {
-    -NAN,
-    -DBL_MAX,
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
-    -1000.,
-    -100.,
-    -4.0,
-    -3.5,
-    -3.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
-    -2.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
-    -2.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
-    -1.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    -1.0,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53),
-    -0.5,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54),
-    -0.25,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-    -DBL_MIN,
-    MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-    -0.0,
-    +NAN,
-    +DBL_MAX,
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
-    +1000.0,
-    +100.0,
-    +4.0,
-    +3.5,
-    +3.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
-    +2.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
-    +2.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
-    +1.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    +1.0,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
-    +0.5,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
-    +0.25,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
-    +DBL_MIN,
-    MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
-    +0.0,
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
-                                           bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    float ulps = job->ulps;
-    dptr func = job->f->dfunc;
-    int ftz = job->ftz;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    cl_ulong *t;
-    cl_double *r;
-    cl_double *s;
-    cl_double *s2;
-    Force64BitFPUPrecision();
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Init input array
-    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
-    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        cl_double *fp = (cl_double *)p;
-        cl_double *fp2 = (cl_double *)p2;
-        uint32_t x, y;
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            fp2[idx] = specialValues[y];
-            if (++x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-        }
-    }
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int64(d);
-        p2[idx] = genrand_int64(d);
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-    // Calculate the correctly rounded reference result
-    r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
-    s = (cl_double *)gIn + thread_id * buffer_elements;
-    s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_ff(s[j], s2[j]);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-    // Verify data
-    t = (cl_ulong *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_ulong *q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_ff(s[j], s2[j]);
-                float err = Bruteforce_Ulp_Error_Double(test, correct);
-                int fail = !(fabsf(err) <= ulps);
-                if (fail && ftz)
-                {
-                    // retry per section
-                    if (IsDoubleResultSubnormal(correct, ulps))
-                    {
-                        fail = fail && (test != 0.0f);
-                        if (!fail) err = 0.0f;
-                    }
-                    // retry per section
-                    if (IsDoubleSubnormal(s[j]))
-                    {
-                        long double correct2 = func.f_ff(0.0, s2[j]);
-                        long double correct3 = func.f_ff(-0.0, s2[j]);
-                        float err2 =
-                            Bruteforce_Ulp_Error_Double(test, correct2);
-                        float err3 =
-                            Bruteforce_Ulp_Error_Double(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        // retry per section
-                        if (IsDoubleResultSubnormal(correct2, ulps)
-                            || IsDoubleResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                        // try with both args as zero
-                        if (IsDoubleSubnormal(s2[j]))
-                        {
-                            correct2 = func.f_ff(0.0, 0.0);
-                            correct3 = func.f_ff(-0.0, 0.0);
-                            long double correct4 = func.f_ff(0.0, -0.0);
-                            long double correct5 = func.f_ff(-0.0, -0.0);
-                            err2 = Bruteforce_Ulp_Error_Double(test, correct2);
-                            err3 = Bruteforce_Ulp_Error_Double(test, correct3);
-                            float err4 =
-                                Bruteforce_Ulp_Error_Double(test, correct4);
-                            float err5 =
-                                Bruteforce_Ulp_Error_Double(test, correct5);
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps))
-                                    && (!(fabsf(err4) <= ulps))
-                                    && (!(fabsf(err5) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            if (fabsf(err4) < fabsf(err)) err = err4;
-                            if (fabsf(err5) < fabsf(err)) err = err5;
-                            // retry per section
-                            if (IsDoubleResultSubnormal(correct2, ulps)
-                                || IsDoubleResultSubnormal(correct3, ulps)
-                                || IsDoubleResultSubnormal(correct4, ulps)
-                                || IsDoubleResultSubnormal(correct5, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    else if (IsDoubleSubnormal(s2[j]))
-                    {
-                        long double correct2 = func.f_ff(s[j], 0.0);
-                        long double correct3 = func.f_ff(s[j], -0.0);
-                        float err2 =
-                            Bruteforce_Ulp_Error_Double(test, correct2);
-                        float err3 =
-                            Bruteforce_Ulp_Error_Double(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        // retry per section
-                        if (IsDoubleResultSubnormal(correct2, ulps)
-                            || IsDoubleResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                    tinfo->maxErrorValue2 = s2[j];
-                }
-                if (fail)
-                {
-                    vlog_error(
-                        "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a\n",
-                        name, sizeNames[k], err, s[j], s2[j], r[j], test);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
deleted file mode 100644
index ccaef60..0000000
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ /dev/null
@@ -1,925 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *operator_symbol, int vectorSize,
-                       cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                       bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = in1[i] ",
-                        operator_symbol,
-                        " in2[i];\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       f0 = f0 ",
-        operator_symbol,
-        " f1;\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = f0 ",
-        operator_symbol,
-        " f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *operator_symbol;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->operator_symbol, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double
-        maxErrorValue; // position of the max error value (param 1).  Init to 0.
-    double maxErrorValue2; // position of the max error value (param 2).  Init
-                           // to 0.
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-    bool relaxedMode; // True if the test is being run in relaxed mode, false
-                      // otherwise.
-    // no special fields
-} TestInfo;
-// A table of more difficult cases to get right
-static const float specialValues[] = {
-    -NAN,
-    -FLT_MAX,
-    MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
-    MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
-    MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
-    MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
-    MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
-    MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
-    MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
-    MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
-    MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
-    -1000.f,
-    -100.f,
-    -4.0f,
-    -3.5f,
-    -3.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
-    -2.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
-    -2.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
-    -1.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
-    -1.0f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
-    -0.5f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
-    -0.25f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
-    -FLT_MIN,
-    MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
-    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
-    MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
-    MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
-    MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
-    -0.0f,
-    +NAN,
-    +FLT_MAX,
-    MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
-    MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
-    MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
-    MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
-    MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
-    MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
-    MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
-    MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
-    MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
-    +1000.f,
-    +100.f,
-    +4.0f,
-    +3.5f,
-    +3.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
-    2.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
-    +2.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
-    1.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
-    +1.0f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
-    +0.5f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
-    +0.25f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
-    +FLT_MIN,
-    MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
-    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
-    MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
-    MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
-    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
-    +0.0f,
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
-                                        bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    test_info.relaxedMode = relaxedMode;
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    int ftz = job->ftz;
-    bool relaxedMode = job->relaxedMode;
-    float ulps = getAllowedUlpError(job->f, relaxedMode);
-    MTdata d = tinfo->d;
-    cl_int error;
-    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
-    const char *name = job->f->name;
-    cl_uint *t = 0;
-    cl_float *r = 0;
-    cl_float *s = 0;
-    cl_float *s2 = 0;
-    RoundingMode oldRoundMode;
-    if (relaxedMode)
-    {
-        func = job->f->rfunc;
-    }
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Init input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    {
-        // Insert special values
-        uint32_t x, y;
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-        for (; idx < buffer_elements; idx++)
-        {
-            p[idx] = ((cl_uint *)specialValues)[x];
-            p2[idx] = ((cl_uint *)specialValues)[y];
-            ++x;
-            if (x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-            if (relaxedMode && strcmp(name, "divide") == 0)
-            {
-                cl_uint pj = p[idx] & 0x7fffffff;
-                cl_uint p2j = p2[idx] & 0x7fffffff;
-                // Replace values outside [2^-62, 2^62] with QNaN
-                if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
-                if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
-            }
-        }
-    }
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int32(d);
-        p2[idx] = genrand_int32(d);
-        if (relaxedMode && strcmp(name, "divide") == 0)
-        {
-            cl_uint pj = p[idx] & 0x7fffffff;
-            cl_uint p2j = p2[idx] & 0x7fffffff;
-            // Replace values outside [2^-62, 2^62] with QNaN
-            if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
-            if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
-        }
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting)
-    {
-        free(overflow);
-        return CL_SUCCESS;
-    }
-    // Calculate the correctly rounded reference result
-    FPU_mode_type oldMode;
-    memset(&oldMode, 0, sizeof(oldMode));
-    if (ftz) ForceFTZ(&oldMode);
-    // Set the rounding mode to match the device
-    oldRoundMode = kRoundToNearestEven;
-    if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
-    // Calculate the correctly rounded reference result
-    r = (float *)gOut_Ref + thread_id * buffer_elements;
-    s = (float *)gIn + thread_id * buffer_elements;
-    s2 = (float *)gIn2 + thread_id * buffer_elements;
-    if (gInfNanSupport)
-    {
-        for (size_t j = 0; j < buffer_elements; j++)
-            r[j] = (float)func.f_ff(s[j], s2[j]);
-    }
-    else
-    {
-        for (size_t j = 0; j < buffer_elements; j++)
-        {
-            feclearexcept(FE_OVERFLOW);
-            r[j] = (float)func.f_ff(s[j], s2[j]);
-            overflow[j] =
-                FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-        }
-    }
-    if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
-    if (ftz) RestoreFPState(&oldMode);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-    // Verify data
-    t = (cl_uint *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_uint *q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                float test = ((float *)q)[j];
-                double correct = func.f_ff(s[j], s2[j]);
-                // Per section 10 paragraph 6, accept any result if an input or
-                // output is a infinity or NaN or overflow
-                if (!gInfNanSupport)
-                {
-                    // Note: no double rounding here.  Reference functions
-                    // calculate in single precision.
-                    if (overflow[j] || IsFloatInfinity(correct)
-                        || IsFloatNaN(correct) || IsFloatInfinity(s2[j])
-                        || IsFloatNaN(s2[j]) || IsFloatInfinity(s[j])
-                        || IsFloatNaN(s[j]))
-                        continue;
-                }
-                // Per section 10 paragraph 6, accept embedded devices always
-                // returning positive 0.0.
-                if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000))
-                    continue;
-                float err = Ulp_Error(test, correct);
-                float errB = Ulp_Error(test, (float)correct);
-                int fail =
-                    ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps)));
-                if (fabsf(errB) < fabsf(err)) err = errB;
-                if (fail && ftz)
-                {
-                    // retry per section
-                    if (IsFloatResultSubnormal(correct, ulps))
-                    {
-                        fail = fail && (test != 0.0f);
-                        if (!fail) err = 0.0f;
-                    }
-                    // retry per section
-                    if (IsFloatSubnormal(s[j]))
-                    {
-                        double correct2, correct3;
-                        float err2, err3;
-                        if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
-                        correct2 = func.f_ff(0.0, s2[j]);
-                        correct3 = func.f_ff(-0.0, s2[j]);
-                        // Per section 10 paragraph 6, accept any result if an
-                        // input or output is a infinity or NaN or overflow
-                        if (!gInfNanSupport)
-                        {
-                            if (fetestexcept(FE_OVERFLOW)) continue;
-                            // Note: no double rounding here.  Reference
-                            // functions calculate in single precision.
-                            if (IsFloatInfinity(correct2)
-                                || IsFloatNaN(correct2)
-                                || IsFloatInfinity(correct3)
-                                || IsFloatNaN(correct3))
-                                continue;
-                        }
-                        err2 = Ulp_Error(test, correct2);
-                        err3 = Ulp_Error(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        // retry per section
-                        if (IsFloatResultSubnormal(correct2, ulps)
-                            || IsFloatResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                        // try with both args as zero
-                        if (IsFloatSubnormal(s2[j]))
-                        {
-                            double correct4, correct5;
-                            float err4, err5;
-                            if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
-                            correct2 = func.f_ff(0.0, 0.0);
-                            correct3 = func.f_ff(-0.0, 0.0);
-                            correct4 = func.f_ff(0.0, -0.0);
-                            correct5 = func.f_ff(-0.0, -0.0);
-                            // Per section 10 paragraph 6, accept any result if
-                            // an input or output is a infinity or NaN or
-                            // overflow
-                            if (!gInfNanSupport)
-                            {
-                                if (fetestexcept(FE_OVERFLOW)) continue;
-                                // Note: no double rounding here.  Reference
-                                // functions calculate in single precision.
-                                if (IsFloatInfinity(correct2)
-                                    || IsFloatNaN(correct2)
-                                    || IsFloatInfinity(correct3)
-                                    || IsFloatNaN(correct3)
-                                    || IsFloatInfinity(correct4)
-                                    || IsFloatNaN(correct4)
-                                    || IsFloatInfinity(correct5)
-                                    || IsFloatNaN(correct5))
-                                    continue;
-                            }
-                            err2 = Ulp_Error(test, correct2);
-                            err3 = Ulp_Error(test, correct3);
-                            err4 = Ulp_Error(test, correct4);
-                            err5 = Ulp_Error(test, correct5);
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps))
-                                    && (!(fabsf(err4) <= ulps))
-                                    && (!(fabsf(err5) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            if (fabsf(err4) < fabsf(err)) err = err4;
-                            if (fabsf(err5) < fabsf(err)) err = err5;
-                            // retry per section
-                            if (IsFloatResultSubnormal(correct2, ulps)
-                                || IsFloatResultSubnormal(correct3, ulps)
-                                || IsFloatResultSubnormal(correct4, ulps)
-                                || IsFloatResultSubnormal(correct5, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    else if (IsFloatSubnormal(s2[j]))
-                    {
-                        double correct2, correct3;
-                        float err2, err3;
-                        if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
-                        correct2 = func.f_ff(s[j], 0.0);
-                        correct3 = func.f_ff(s[j], -0.0);
-                        // Per section 10 paragraph 6, accept any result if an
-                        // input or output is a infinity or NaN or overflow
-                        if (!gInfNanSupport)
-                        {
-                            // Note: no double rounding here.  Reference
-                            // functions calculate in single precision.
-                            if (overflow[j] || IsFloatInfinity(correct)
-                                || IsFloatNaN(correct)
-                                || IsFloatInfinity(correct2)
-                                || IsFloatNaN(correct2))
-                                continue;
-                        }
-                        err2 = Ulp_Error(test, correct2);
-                        err3 = Ulp_Error(test, correct3);
-                        fail = fail
-                            && ((!(fabsf(err2) <= ulps))
-                                && (!(fabsf(err3) <= ulps)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        // retry per section
-                        if (IsFloatResultSubnormal(correct2, ulps)
-                            || IsFloatResultSubnormal(correct3, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                    tinfo->maxErrorValue2 = s2[j];
-                }
-                if (fail)
-                {
-                    vlog_error("\nERROR: %s%s: %f ulp error at {%a, %a}: *%a "
-                               "vs. %a (0x%8.8x) at index: %d\n",
-                               name, sizeNames[k], err, s[j], s2[j], r[j], test,
-                               ((cl_uint *)&test)[0], j);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    if (overflow) free(overflow);
-    return error;
diff --git a/test_conformance/math_brute_force/binary_two_results_i.cpp b/test_conformance/math_brute_force/binary_two_results_i.cpp
new file mode 100644
index 0000000..91cebf5
--- /dev/null
+++ b/test_conformance/math_brute_force/binary_two_results_i.cpp
@@ -0,0 +1,1132 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <limits.h>
+#include <string.h>
+#include "FunctionList.h"
+int TestFunc_FloatI_Float_Float(const Func *f, MTdata);
+int TestFunc_DoubleI_Double_Double(const Func *f, MTdata);
+extern const vtbl _binary_two_results_i = { "binary_two_results_i",
+                                            TestFunc_FloatI_Float_Float,
+                                            TestFunc_DoubleI_Double_Double };
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], out2 + i );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global int* out2, __global float* in, __global float* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       f0 = ", name, "( f0, f1, &i0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( i0, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       f0 = ", name, "( f0, f1, &i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], out2 + i );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global int* out2, __global double* in, __global double* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       d0 = ", name, "( d0, d1, &i0 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "       vstore3( i0, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       d0 = ", name, "( d0, d1, &i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               out2[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               out2[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+typedef struct ComputeReferenceInfoF_
+    const float *x;
+    const float *y;
+    float *r;
+    int *i;
+    double (*f_ffpI)(double, double, int*);
+    cl_uint lim;
+    cl_uint count;
+} ComputeReferenceInfoF;
+typedef struct ComputeReferenceInfoD_
+    const double *x;
+    const double *y;
+    double *r;
+    int *i;
+    long double (*f_ffpI)(long double, long double, int*);
+    cl_uint lim;
+    cl_uint count;
+} ComputeReferenceInfoD;
+static cl_int
+ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
+    ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    const float *x = cri->x + off;
+    const float *y = cri->y + off;
+    float *r = cri->r + off;
+    int *i = cri->i + off;
+    double (*f)(double, double, int *) = cri->f_ffpI;
+    cl_uint j;
+    if (off + count > lim)
+    count = lim - off;
+    for (j = 0; j < count; ++j)
+    r[j] = (float)f((double)x[j], (double)y[j], i + j);
+    return CL_SUCCESS;
+static cl_int
+ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
+    ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    const double *x = cri->x + off;
+    const double *y = cri->y + off;
+    double *r = cri->r + off;
+    int *i = cri->i + off;
+    long double (*f)(long double, long double, int *) = cri->f_ffpI;
+    cl_uint j;
+    if (off + count > lim)
+    count = lim - off;
+    Force64BitFPUPrecision();
+    for (j = 0; j < count; ++j)
+    r[j] = (double)f((long double)x[j], (long double)y[j], i + j);
+    return CL_SUCCESS;
+int TestFunc_FloatI_Float_Float(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    float float_ulps;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+    cl_uint threadCount = GetThreadCount();
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if(gWimpyMode ){
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+    int testingRemquo = !strcmp(f->name, "remquo");
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            return error;
+    }
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        cl_uint *p = (cl_uint *)gIn;
+        cl_uint *p2 = (cl_uint *)gIn2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        // Calculate the correctly rounded reference result
+        float *s = (float *)gIn;
+        float *s2 = (float *)gIn2;
+    if (threadCount > 1) {
+        ComputeReferenceInfoF cri;
+        cri.x = s;
+        cri.y = s2;
+        cri.r = (float *)gOut_Ref;
+        cri.i = (int *)gOut_Ref2;
+        cri.f_ffpI = f->func.f_ffpI;
+        cri.lim = bufferSize / sizeof( float );
+        cri.count = (cri.lim + threadCount - 1) / threadCount;
+        ThreadPool_Do(ReferenceF, threadCount, &cri);
+    } else {
+            float *r = (float *)gOut_Ref;
+            int *r2 = (int *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                r[j] = (float) f->func.f_ffpI( s[j], s2[j], r2+j );
+    }
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)gOut[k];
+                int32_t *q2 = (int32_t *)gOut2[k];
+                // Check for exact match to correctly rounded result
+        if (t[j] == q[j] && t2[j] == q2[j])
+            continue;
+        // Check for paired NaNs
+        if ((t[j] & 0x7fffffff) > 0x7f800000 && (q[j] & 0x7fffffff) > 0x7f800000 && t2[j] == q2[j])
+            continue;
+                // if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    float test = ((float*) q)[j];
+                    int correct2 = INT_MIN;
+                    double correct = f->func.f_ffpI( s[j], s2[j], &correct2 );
+                    float err = Ulp_Error( test, correct );
+                    int64_t iErr;
+                    // in case of remquo, we only care about the sign and last seven bits of
+                    // integer as per the spec.
+                    if(testingRemquo)
+                        iErr = (long long) (q2[j] & 0x0000007f) - (long long) (correct2 & 0x0000007f);
+                    else
+                        iErr = (long long) q2[j] - (long long) correct2;
+                    //For remquo, if y = 0, x is infinite, or either is NaN then the standard either neglects
+                    //to say what is returned in iptr or leaves it undefined or implementation defined.
+                    int iptrUndefined = fabs(((float*) gIn)[j]) == INFINITY ||
+                                        ((float*) gIn2)[j] == 0.0f          ||
+                                        isnan(((float*) gIn2)[j])           ||
+                                        isnan(((float*) gIn)[j]);
+                    if(iptrUndefined)
+                         iErr = 0;
+                    int fail = ! (fabsf(err) <= float_ulps && iErr == 0 );
+                    if( ftz && fail )
+                    {
+                        // retry per section
+                        if( IsFloatResultSubnormal(correct, float_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        // retry per section
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            double correct3 = f->func.f_ffpI( 0.0, s2[j], &correct3i );
+                            double correct4 = f->func.f_ffpI( -0.0, s2[j], &correct4i );
+                            float err2 = Ulp_Error( test, correct3  );
+                            float err3 = Ulp_Error( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+                            // retry per section
+                            if( IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                            //try with both args as zero
+                            if( IsFloatSubnormal( s2[j] ) )
+                            {
+                                int correct7i, correct8i;
+                                correct3 = f->func.f_ffpI( 0.0, 0.0, &correct3i );
+                                correct4 = f->func.f_ffpI( -0.0, 0.0, &correct4i );
+                                double correct7 = f->func.f_ffpI( 0.0, -0.0, &correct7i );
+                                double correct8 = f->func.f_ffpI( -0.0, -0.0, &correct8i );
+                                err2 = Ulp_Error( test, correct3  );
+                                err3 = Ulp_Error( test, correct4  );
+                                float err4 = Ulp_Error( test, correct7  );
+                                float err5 = Ulp_Error( test, correct8  );
+                                iErr3 = (long long) q2[j] - (long long) correct3i;
+                                iErr4 = (long long) q2[j] - (long long) correct4i;
+                                int64_t iErr7 = (long long) q2[j] - (long long) correct7i;
+                                int64_t iErr8 = (long long) q2[j] - (long long) correct8i;
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps  && iErr4 == 0)) &&
+                                                 (!(fabsf(err4) <= float_ulps  && iErr7 == 0)) && (!(fabsf(err5) <= float_ulps  && iErr8 == 0)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                if( llabs(iErr3) < llabs( iErr ) )
+                                    iErr = iErr3;
+                                if( llabs(iErr4) < llabs( iErr ) )
+                                    iErr = iErr4;
+                                if( llabs(iErr7) < llabs( iErr ) )
+                                    iErr = iErr7;
+                                if( llabs(iErr8) < llabs( iErr ) )
+                                    iErr = iErr8;
+                                // retry per section
+                                if( IsFloatResultSubnormal(correct3, float_ulps ) || IsFloatResultSubnormal(correct4, float_ulps )  ||
+                                    IsFloatResultSubnormal(correct7, float_ulps ) || IsFloatResultSubnormal(correct8, float_ulps ) )
+                                {
+                                    fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0 || iErr7 == 0 || iErr8 == 0));
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( IsFloatSubnormal( s2[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            double correct3 = f->func.f_ffpI( s[j], 0.0, &correct3i );
+                            double correct4 = f->func.f_ffpI( s[j], -0.0, &correct4i );
+                            float err2 = Ulp_Error( test, correct3  );
+                            float err3 = Ulp_Error( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+                            // retry per section
+                            if( IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} ({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, 0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
+                                    f->name, sizeNames[k], err, iErr,
+                                   ((float*) gIn)[j], ((float*) gIn2)[j],
+                                   ((cl_uint*) gIn)[j], ((cl_uint*) gIn2)[j],
+                                   ((float*) gOut_Ref)[j], ((int*) gOut_Ref2)[j],
+                                   ((cl_uint*) gOut_Ref)[j], ((cl_uint*) gOut_Ref2)[j],
+                                   test, q2[j],
+                                   ((cl_uint*)&test)[0], ((cl_uint*) q2)[j] );
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
+int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( double );
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if(gWimpyMode ){
+       step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    cl_uint threadCount = GetThreadCount();
+    Force64BitFPUPrecision();
+    int testingRemquo = !strcmp(f->name, "remquo");
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                    &build_info ) ))
+        {
+            return error;
+        }
+    }
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        double *s = (double *)gIn;
+        double *s2 = (double *)gIn2;
+    if (threadCount > 1) {
+        ComputeReferenceInfoD cri;
+        cri.x = s;
+        cri.y = s2;
+        cri.r = (double *)gOut_Ref;
+        cri.i = (int *)gOut_Ref2;
+        cri.f_ffpI = f->dfunc.f_ffpI;
+        cri.lim = bufferSize / sizeof( double );
+        cri.count = (cri.lim + threadCount - 1) / threadCount;
+        ThreadPool_Do(ReferenceD, threadCount, &cri);
+    } else {
+            double *r = (double *)gOut_Ref;
+            int *r2 = (int *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+                r[j] = (double) f->dfunc.f_ffpI( s[j], s2[j], r2+j );
+    }
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)gOut[k];
+                int32_t *q2 = (int32_t *)gOut2[k];
+        // Check for exact match to correctly rounded result
+        if (t[j] == q[j] && t2[j] == q2[j])
+            continue;
+        // Check for paired NaNs
+        if ((t[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL &&
+            (q[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL &&
+            t2[j] == q2[j])
+            continue;
+                // if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    double test = ((double*) q)[j];
+                    int correct2 = INT_MIN;
+                    long double correct = f->dfunc.f_ffpI( s[j], s2[j], &correct2 );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    int64_t iErr;
+                    // in case of remquo, we only care about the sign and last seven bits of
+                    // integer as per the spec.
+                    if(testingRemquo)
+                        iErr = (long long) (q2[j] & 0x0000007f) - (long long) (correct2 & 0x0000007f);
+                    else
+                        iErr = (long long) q2[j] - (long long) correct2;
+                    //For remquo, if y = 0, x is infinite, or either is NaN then the standard either neglects
+                    //to say what is returned in iptr or leaves it undefined or implementation defined.
+                    int iptrUndefined = fabs(((double*) gIn)[j]) == INFINITY ||
+                                        ((double*) gIn2)[j] == 0.0          ||
+                                        isnan(((double*) gIn2)[j])           ||
+                                        isnan(((double*) gIn)[j]);
+                    if(iptrUndefined)
+                         iErr = 0;
+                    int fail = ! (fabsf(err) <= f->double_ulps && iErr == 0 );
+                    if( ftz && fail )
+                    {
+                        // retry per section
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        // retry per section
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            long double correct3 = f->dfunc.f_ffpI( 0.0, s2[j], &correct3i );
+                            long double correct4 = f->dfunc.f_ffpI( -0.0, s2[j], &correct4i );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                            //try with both args as zero
+                            if( IsDoubleSubnormal( s2[j] ) )
+                            {
+                                int correct7i, correct8i;
+                                correct3 = f->dfunc.f_ffpI( 0.0, 0.0, &correct3i );
+                                correct4 = f->dfunc.f_ffpI( -0.0, 0.0, &correct4i );
+                                long double correct7 = f->dfunc.f_ffpI( 0.0, -0.0, &correct7i );
+                                long double correct8 = f->dfunc.f_ffpI( -0.0, -0.0, &correct8i );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct8  );
+                                iErr3 = (long long) q2[j] - (long long) correct3i;
+                                iErr4 = (long long) q2[j] - (long long) correct4i;
+                                int64_t iErr7 = (long long) q2[j] - (long long) correct7i;
+                                int64_t iErr8 = (long long) q2[j] - (long long) correct8i;
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps  && iErr4 == 0)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps  && iErr7 == 0)) && (!(fabsf(err5) <= f->double_ulps  && iErr8 == 0)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                if( llabs(iErr3) < llabs( iErr ) )
+                                    iErr = iErr3;
+                                if( llabs(iErr4) < llabs( iErr ) )
+                                    iErr = iErr4;
+                                if( llabs(iErr7) < llabs( iErr ) )
+                                    iErr = iErr7;
+                                if( llabs(iErr8) < llabs( iErr ) )
+                                    iErr = iErr8;
+                                // retry per section
+                                if( IsDoubleResultSubnormal( correct3, f->double_ulps ) || IsDoubleResultSubnormal( correct4, f->double_ulps )  ||
+                                    IsDoubleResultSubnormal( correct7, f->double_ulps ) || IsDoubleResultSubnormal( correct8, f->double_ulps ) )
+                                {
+                                    fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0 || iErr7 == 0 || iErr8 == 0));
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( IsDoubleSubnormal( s2[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            long double correct3 = f->dfunc.f_ffpI( s[j], 0.0, &correct3i );
+                            long double correct4 = f->dfunc.f_ffpI( s[j], -0.0, &correct4i );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, %.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, %d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ 0x%16.16llx, 0x%8.8x})\n",
+                                    f->name, sizeNames[k], err, iErr,
+                                   ((double*) gIn)[j], ((double*) gIn2)[j],
+                                   ((cl_ulong*) gIn)[j], ((cl_ulong*) gIn2)[j],
+                                   ((double*) gOut_Ref)[j], ((int*) gOut_Ref2)[j],
+                                   ((cl_ulong*) gOut_Ref)[j], ((cl_uint*) gOut_Ref2)[j],
+                                   test, q2[j],
+                                   ((cl_ulong*) q)[j], ((cl_uint*) q2)[j]);
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = DoubleFromUInt32( genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
deleted file mode 100644
index 14f4109..0000000
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ /dev/null
@@ -1,580 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <climits>
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], out2 + i );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global int* out2, __global double* in, "
-        "__global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, &i0 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "       vstore3( i0, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, &i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               out2[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               out2[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-typedef struct ComputeReferenceInfoD_
-    const double *x;
-    const double *y;
-    double *r;
-    int *i;
-    long double (*f_ffpI)(long double, long double, int *);
-    cl_uint lim;
-    cl_uint count;
-} ComputeReferenceInfoD;
-static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
-    ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo;
-    cl_uint lim = cri->lim;
-    cl_uint count = cri->count;
-    cl_uint off = jid * count;
-    const double *x = cri->x + off;
-    const double *y = cri->y + off;
-    double *r = cri->r + off;
-    int *i = cri->i + off;
-    long double (*f)(long double, long double, int *) = cri->f_ffpI;
-    if (off + count > lim) count = lim - off;
-    Force64BitFPUPrecision();
-    for (cl_uint j = 0; j < count; ++j)
-        r[j] = (double)f((long double)x[j], (long double)y[j], i + j);
-    return CL_SUCCESS;
-int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int64_t maxError2 = 0;
-    int ftz = f->ftz || gForceFTZ;
-    double maxErrorVal = 0.0f;
-    double maxErrorVal2 = 0.0f;
-    uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    cl_uint threadCount = GetThreadCount();
-    Force64BitFPUPrecision();
-    int testingRemquo = !strcmp(f->name, "remquo");
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        double *p2 = (double *)gIn2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            p[j] = DoubleFromUInt32(genrand_int32(d));
-            p2[j] = DoubleFromUInt32(genrand_int32(d));
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        double *s = (double *)gIn;
-        double *s2 = (double *)gIn2;
-        if (threadCount > 1)
-        {
-            ComputeReferenceInfoD cri;
-            cri.x = s;
-            cri.y = s2;
-            cri.r = (double *)gOut_Ref;
-            cri.i = (int *)gOut_Ref2;
-            cri.f_ffpI = f->dfunc.f_ffpI;
-            cri.lim = BUFFER_SIZE / sizeof(double);
-            cri.count = (cri.lim + threadCount - 1) / threadCount;
-            ThreadPool_Do(ReferenceD, threadCount, &cri);
-        }
-        else
-        {
-            double *r = (double *)gOut_Ref;
-            int *r2 = (int *)gOut_Ref2;
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-                r[j] = (double)f->dfunc.f_ffpI(s[j], s2[j], r2 + j);
-        }
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint64_t *t = (uint64_t *)gOut_Ref;
-        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint64_t *q = (uint64_t *)gOut[k];
-                int32_t *q2 = (int32_t *)gOut2[k];
-                // Check for exact match to correctly rounded result
-                if (t[j] == q[j] && t2[j] == q2[j]) continue;
-                // Check for paired NaNs
-                if ((t[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL
-                    && (q[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL
-                    && t2[j] == q2[j])
-                    continue;
-                double test = ((double *)q)[j];
-                int correct2 = INT_MIN;
-                long double correct = f->dfunc.f_ffpI(s[j], s2[j], &correct2);
-                float err = Bruteforce_Ulp_Error_Double(test, correct);
-                int64_t iErr;
-                // in case of remquo, we only care about the sign and last
-                // seven bits of integer as per the spec.
-                if (testingRemquo)
-                    iErr = (long long)(q2[j] & 0x0000007f)
-                        - (long long)(correct2 & 0x0000007f);
-                else
-                    iErr = (long long)q2[j] - (long long)correct2;
-                // For remquo, if y = 0, x is infinite, or either is NaN
-                // then the standard either neglects to say what is returned
-                // in iptr or leaves it undefined or implementation defined.
-                int iptrUndefined = fabs(((double *)gIn)[j]) == INFINITY
-                    || ((double *)gIn2)[j] == 0.0 || isnan(((double *)gIn2)[j])
-                    || isnan(((double *)gIn)[j]);
-                if (iptrUndefined) iErr = 0;
-                int fail = !(fabsf(err) <= f->double_ulps && iErr == 0);
-                if (ftz && fail)
-                {
-                    // retry per section
-                    if (IsDoubleResultSubnormal(correct, f->double_ulps))
-                    {
-                        fail = fail && !(test == 0.0f && iErr == 0);
-                        if (!fail) err = 0.0f;
-                    }
-                    // retry per section
-                    if (IsDoubleSubnormal(s[j]))
-                    {
-                        int correct3i, correct4i;
-                        long double correct3 =
-                            f->dfunc.f_ffpI(0.0, s2[j], &correct3i);
-                        long double correct4 =
-                            f->dfunc.f_ffpI(-0.0, s2[j], &correct4i);
-                        float err2 =
-                            Bruteforce_Ulp_Error_Double(test, correct3);
-                        float err3 =
-                            Bruteforce_Ulp_Error_Double(test, correct4);
-                        int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
-                        int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
-                        fail = fail
-                            && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0))
-                                && (!(fabsf(err3) <= f->double_ulps
-                                      && iErr4 == 0)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                        if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-                        // retry per section
-                        if (IsDoubleResultSubnormal(correct2, f->double_ulps)
-                            || IsDoubleResultSubnormal(correct3,
-                                                       f->double_ulps))
-                        {
-                            fail = fail
-                                && !(test == 0.0f
-                                     && (iErr3 == 0 || iErr4 == 0));
-                            if (!fail) err = 0.0f;
-                        }
-                        // try with both args as zero
-                        if (IsDoubleSubnormal(s2[j]))
-                        {
-                            int correct7i, correct8i;
-                            correct3 = f->dfunc.f_ffpI(0.0, 0.0, &correct3i);
-                            correct4 = f->dfunc.f_ffpI(-0.0, 0.0, &correct4i);
-                            long double correct7 =
-                                f->dfunc.f_ffpI(0.0, -0.0, &correct7i);
-                            long double correct8 =
-                                f->dfunc.f_ffpI(-0.0, -0.0, &correct8i);
-                            err2 = Bruteforce_Ulp_Error_Double(test, correct3);
-                            err3 = Bruteforce_Ulp_Error_Double(test, correct4);
-                            float err4 =
-                                Bruteforce_Ulp_Error_Double(test, correct7);
-                            float err5 =
-                                Bruteforce_Ulp_Error_Double(test, correct8);
-                            iErr3 = (long long)q2[j] - (long long)correct3i;
-                            iErr4 = (long long)q2[j] - (long long)correct4i;
-                            int64_t iErr7 =
-                                (long long)q2[j] - (long long)correct7i;
-                            int64_t iErr8 =
-                                (long long)q2[j] - (long long)correct8i;
-                            fail = fail
-                                && ((!(fabsf(err2) <= f->double_ulps
-                                       && iErr3 == 0))
-                                    && (!(fabsf(err3) <= f->double_ulps
-                                          && iErr4 == 0))
-                                    && (!(fabsf(err4) <= f->double_ulps
-                                          && iErr7 == 0))
-                                    && (!(fabsf(err5) <= f->double_ulps
-                                          && iErr8 == 0)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            if (fabsf(err4) < fabsf(err)) err = err4;
-                            if (fabsf(err5) < fabsf(err)) err = err5;
-                            if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                            if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-                            if (llabs(iErr7) < llabs(iErr)) iErr = iErr7;
-                            if (llabs(iErr8) < llabs(iErr)) iErr = iErr8;
-                            // retry per section
-                            if (IsDoubleResultSubnormal(correct3,
-                                                        f->double_ulps)
-                                || IsDoubleResultSubnormal(correct4,
-                                                           f->double_ulps)
-                                || IsDoubleResultSubnormal(correct7,
-                                                           f->double_ulps)
-                                || IsDoubleResultSubnormal(correct8,
-                                                           f->double_ulps))
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && (iErr3 == 0 || iErr4 == 0
-                                             || iErr7 == 0 || iErr8 == 0));
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    else if (IsDoubleSubnormal(s2[j]))
-                    {
-                        int correct3i, correct4i;
-                        long double correct3 =
-                            f->dfunc.f_ffpI(s[j], 0.0, &correct3i);
-                        long double correct4 =
-                            f->dfunc.f_ffpI(s[j], -0.0, &correct4i);
-                        float err2 =
-                            Bruteforce_Ulp_Error_Double(test, correct3);
-                        float err3 =
-                            Bruteforce_Ulp_Error_Double(test, correct4);
-                        int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
-                        int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
-                        fail = fail
-                            && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0))
-                                && (!(fabsf(err3) <= f->double_ulps
-                                      && iErr4 == 0)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                        if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-                        // retry per section
-                        if (IsDoubleResultSubnormal(correct2, f->double_ulps)
-                            || IsDoubleResultSubnormal(correct3,
-                                                       f->double_ulps))
-                        {
-                            fail = fail
-                                && !(test == 0.0f
-                                     && (iErr3 == 0 || iErr4 == 0));
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-                if (fabsf(err) > maxError)
-                {
-                    maxError = fabsf(err);
-                    maxErrorVal = s[j];
-                }
-                if (llabs(iErr) > maxError2)
-                {
-                    maxError2 = llabs(iErr);
-                    maxErrorVal2 = s[j];
-                }
-                if (fail)
-                {
-                    vlog_error(
-                        "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, "
-                        "%.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, "
-                        "%d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ "
-                        "0x%16.16llx, 0x%8.8x})\n",
-                        f->name, sizeNames[k], err, iErr, ((double *)gIn)[j],
-                        ((double *)gIn2)[j], ((cl_ulong *)gIn)[j],
-                        ((cl_ulong *)gIn2)[j], ((double *)gOut_Ref)[j],
-                        ((int *)gOut_Ref2)[j], ((cl_ulong *)gOut_Ref)[j],
-                        ((cl_uint *)gOut_Ref2)[j], test, q2[j],
-                        ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
deleted file mode 100644
index 5ef44b6..0000000
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ /dev/null
@@ -1,565 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <climits>
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], out2 + i );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global int* out2, __global float* in, "
-        "__global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, &i0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( i0, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, &i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-typedef struct ComputeReferenceInfoF_
-    const float *x;
-    const float *y;
-    float *r;
-    int *i;
-    double (*f_ffpI)(double, double, int *);
-    cl_uint lim;
-    cl_uint count;
-} ComputeReferenceInfoF;
-static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
-    ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
-    cl_uint lim = cri->lim;
-    cl_uint count = cri->count;
-    cl_uint off = jid * count;
-    const float *x = cri->x + off;
-    const float *y = cri->y + off;
-    float *r = cri->r + off;
-    int *i = cri->i + off;
-    double (*f)(double, double, int *) = cri->f_ffpI;
-    if (off + count > lim) count = lim - off;
-    for (cl_uint j = 0; j < count; ++j)
-        r[j] = (float)f((double)x[j], (double)y[j], i + j);
-    return CL_SUCCESS;
-int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    int64_t maxError2 = 0;
-    float maxErrorVal = 0.0f;
-    float maxErrorVal2 = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    cl_uint threadCount = GetThreadCount();
-    float float_ulps;
-    if (gIsEmbedded)
-        float_ulps = f->float_embedded_ulps;
-    else
-        float_ulps = f->float_ulps;
-    int testingRemquo = !strcmp(f->name, "remquo");
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        cl_uint *p = (cl_uint *)gIn;
-        cl_uint *p2 = (cl_uint *)gIn2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            p[j] = genrand_int32(d);
-            p2[j] = genrand_int32(d);
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        float *s = (float *)gIn;
-        float *s2 = (float *)gIn2;
-        if (threadCount > 1)
-        {
-            ComputeReferenceInfoF cri;
-            cri.x = s;
-            cri.y = s2;
-            cri.r = (float *)gOut_Ref;
-            cri.i = (int *)gOut_Ref2;
-            cri.f_ffpI = f->func.f_ffpI;
-            cri.lim = BUFFER_SIZE / sizeof(float);
-            cri.count = (cri.lim + threadCount - 1) / threadCount;
-            ThreadPool_Do(ReferenceF, threadCount, &cri);
-        }
-        else
-        {
-            float *r = (float *)gOut_Ref;
-            int *r2 = (int *)gOut_Ref2;
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                r[j] = (float)f->func.f_ffpI(s[j], s2[j], r2 + j);
-        }
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-                int32_t *q2 = (int32_t *)gOut2[k];
-                // Check for exact match to correctly rounded result
-                if (t[j] == q[j] && t2[j] == q2[j]) continue;
-                // Check for paired NaNs
-                if ((t[j] & 0x7fffffff) > 0x7f800000
-                    && (q[j] & 0x7fffffff) > 0x7f800000 && t2[j] == q2[j])
-                    continue;
-                float test = ((float *)q)[j];
-                int correct2 = INT_MIN;
-                double correct = f->func.f_ffpI(s[j], s2[j], &correct2);
-                float err = Ulp_Error(test, correct);
-                int64_t iErr;
-                // in case of remquo, we only care about the sign and last
-                // seven bits of integer as per the spec.
-                if (testingRemquo)
-                    iErr = (long long)(q2[j] & 0x0000007f)
-                        - (long long)(correct2 & 0x0000007f);
-                else
-                    iErr = (long long)q2[j] - (long long)correct2;
-                // For remquo, if y = 0, x is infinite, or either is NaN
-                // then the standard either neglects to say what is returned
-                // in iptr or leaves it undefined or implementation defined.
-                int iptrUndefined = fabs(((float *)gIn)[j]) == INFINITY
-                    || ((float *)gIn2)[j] == 0.0f || isnan(((float *)gIn2)[j])
-                    || isnan(((float *)gIn)[j]);
-                if (iptrUndefined) iErr = 0;
-                int fail = !(fabsf(err) <= float_ulps && iErr == 0);
-                if (ftz && fail)
-                {
-                    // retry per section
-                    if (IsFloatResultSubnormal(correct, float_ulps))
-                    {
-                        fail = fail && !(test == 0.0f && iErr == 0);
-                        if (!fail) err = 0.0f;
-                    }
-                    // retry per section
-                    if (IsFloatSubnormal(s[j]))
-                    {
-                        int correct3i, correct4i;
-                        double correct3 =
-                            f->func.f_ffpI(0.0, s2[j], &correct3i);
-                        double correct4 =
-                            f->func.f_ffpI(-0.0, s2[j], &correct4i);
-                        float err2 = Ulp_Error(test, correct3);
-                        float err3 = Ulp_Error(test, correct4);
-                        int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
-                        int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
-                        fail = fail
-                            && ((!(fabsf(err2) <= float_ulps && iErr3 == 0))
-                                && (!(fabsf(err3) <= float_ulps
-                                      && iErr4 == 0)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                        if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-                        // retry per section
-                        if (IsFloatResultSubnormal(correct2, float_ulps)
-                            || IsFloatResultSubnormal(correct3, float_ulps))
-                        {
-                            fail = fail
-                                && !(test == 0.0f
-                                     && (iErr3 == 0 || iErr4 == 0));
-                            if (!fail) err = 0.0f;
-                        }
-                        // try with both args as zero
-                        if (IsFloatSubnormal(s2[j]))
-                        {
-                            int correct7i, correct8i;
-                            correct3 = f->func.f_ffpI(0.0, 0.0, &correct3i);
-                            correct4 = f->func.f_ffpI(-0.0, 0.0, &correct4i);
-                            double correct7 =
-                                f->func.f_ffpI(0.0, -0.0, &correct7i);
-                            double correct8 =
-                                f->func.f_ffpI(-0.0, -0.0, &correct8i);
-                            err2 = Ulp_Error(test, correct3);
-                            err3 = Ulp_Error(test, correct4);
-                            float err4 = Ulp_Error(test, correct7);
-                            float err5 = Ulp_Error(test, correct8);
-                            iErr3 = (long long)q2[j] - (long long)correct3i;
-                            iErr4 = (long long)q2[j] - (long long)correct4i;
-                            int64_t iErr7 =
-                                (long long)q2[j] - (long long)correct7i;
-                            int64_t iErr8 =
-                                (long long)q2[j] - (long long)correct8i;
-                            fail = fail
-                                && ((!(fabsf(err2) <= float_ulps && iErr3 == 0))
-                                    && (!(fabsf(err3) <= float_ulps
-                                          && iErr4 == 0))
-                                    && (!(fabsf(err4) <= float_ulps
-                                          && iErr7 == 0))
-                                    && (!(fabsf(err5) <= float_ulps
-                                          && iErr8 == 0)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            if (fabsf(err4) < fabsf(err)) err = err4;
-                            if (fabsf(err5) < fabsf(err)) err = err5;
-                            if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                            if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-                            if (llabs(iErr7) < llabs(iErr)) iErr = iErr7;
-                            if (llabs(iErr8) < llabs(iErr)) iErr = iErr8;
-                            // retry per section
-                            if (IsFloatResultSubnormal(correct3, float_ulps)
-                                || IsFloatResultSubnormal(correct4, float_ulps)
-                                || IsFloatResultSubnormal(correct7, float_ulps)
-                                || IsFloatResultSubnormal(correct8, float_ulps))
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && (iErr3 == 0 || iErr4 == 0
-                                             || iErr7 == 0 || iErr8 == 0));
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    else if (IsFloatSubnormal(s2[j]))
-                    {
-                        int correct3i, correct4i;
-                        double correct3 = f->func.f_ffpI(s[j], 0.0, &correct3i);
-                        double correct4 =
-                            f->func.f_ffpI(s[j], -0.0, &correct4i);
-                        float err2 = Ulp_Error(test, correct3);
-                        float err3 = Ulp_Error(test, correct4);
-                        int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
-                        int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
-                        fail = fail
-                            && ((!(fabsf(err2) <= float_ulps && iErr3 == 0))
-                                && (!(fabsf(err3) <= float_ulps
-                                      && iErr4 == 0)));
-                        if (fabsf(err2) < fabsf(err)) err = err2;
-                        if (fabsf(err3) < fabsf(err)) err = err3;
-                        if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
-                        if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
-                        // retry per section
-                        if (IsFloatResultSubnormal(correct2, float_ulps)
-                            || IsFloatResultSubnormal(correct3, float_ulps))
-                        {
-                            fail = fail
-                                && !(test == 0.0f
-                                     && (iErr3 == 0 || iErr4 == 0));
-                            if (!fail) err = 0.0f;
-                        }
-                    }
-                }
-                if (fabsf(err) > maxError)
-                {
-                    maxError = fabsf(err);
-                    maxErrorVal = s[j];
-                }
-                if (llabs(iErr) > maxError2)
-                {
-                    maxError2 = llabs(iErr);
-                    maxErrorVal2 = s[j];
-                }
-                if (fail)
-                {
-                    vlog_error(
-                        "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} "
-                        "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, "
-                        "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
-                        f->name, sizeNames[k], err, iErr, ((float *)gIn)[j],
-                        ((float *)gIn2)[j], ((cl_uint *)gIn)[j],
-                        ((cl_uint *)gIn2)[j], ((float *)gOut_Ref)[j],
-                        ((int *)gOut_Ref2)[j], ((cl_uint *)gOut_Ref)[j],
-                        ((cl_uint *)gOut_Ref2)[j], test, q2[j],
-                        ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
deleted file mode 100644
index 3edbb48..0000000
--- a/test_conformance/math_brute_force/function_list.cpp
+++ /dev/null
@@ -1,391 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "reference_math.h"
-#include "test_functions.h"
-#define FTZ_ON 1
-#define FTZ_OFF 0
-#define EXACT 0.0f
-#define RELAXED_ON 1
-#define RELAXED_OFF 0
-#define STRINGIFY(_s) #_s
-// Only use ulps information in spir test
-#define ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                       \
-    {                                                                          \
-        STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,             \
-            RELAXED_OFF, _type                                                 \
-    }
-#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type,     \
-                  _relaxed_embedded_ulp)                                       \
-    {                                                                          \
-        STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp,    \
-            _rmode, RELAXED_ON, _type                                          \
-    }
-#define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
-    {                                                                          \
-        "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), { NULL },          \
-            { NULL }, { NULL }, _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, \
-            _rmode, RELAXED_OFF, _type                                         \
-    }
-#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)   \
-    {                                                                          \
-        STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
-            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
-    }
-#define unaryF NULL
-#define i_unaryF NULL
-#define unaryF_u NULL
-#define macro_unaryF NULL
-#define binaryF NULL
-#define binaryOperatorF NULL
-#define binaryF_i NULL
-#define macro_binaryF NULL
-#define ternaryF NULL
-#define unaryF_two_results NULL
-#define unaryF_two_results_i NULL
-#define binaryF_two_results_i NULL
-#define mad_function NULL
-#define reference_sqrt NULL
-#define reference_sqrtl NULL
-#define reference_divide NULL
-#define reference_dividel NULL
-#define reference_relaxed_divide NULL
-#define ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                       \
-    {                                                                          \
-        STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
-            { (void*)reference_##_name##l }, { (void*)reference_##_name },     \
-            _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,             \
-            RELAXED_OFF, _type                                                 \
-    }
-#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type,     \
-                  _relaxed_embedded_ulp)                                       \
-    {                                                                          \
-        STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
-            { (void*)reference_##_name##l },                                   \
-            { (void*)reference_##relaxed_##_name }, _ulp, _ulp, _embedded_ulp, \
-            _relaxed_ulp, _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type     \
-    }
-#define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
-    {                                                                          \
-        "half_" STRINGIFY(_name), "half_" STRINGIFY(_name),                    \
-            { (void*)reference_##_name }, { NULL }, { NULL }, _ulp, _ulp,      \
-            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
-    }
-#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)   \
-    {                                                                          \
-        STRINGIFY(_name), _operator, { (void*)reference_##_name },             \
-            { (void*)reference_##_name##l }, { NULL }, _ulp, _ulp,             \
-            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
-    }
-static constexpr vtbl _unary = {
-    "unary",
-    TestFunc_Float_Float,
-    TestFunc_Double_Double,
-static constexpr vtbl _i_unary = {
-    "i_unary",
-    TestFunc_Int_Float,
-    TestFunc_Int_Double,
-static constexpr vtbl _unary_u = {
-    "unary_u",
-    TestFunc_Float_UInt,
-    TestFunc_Double_ULong,
-static constexpr vtbl _macro_unary = {
-    "macro_unary",
-    TestMacro_Int_Float,
-    TestMacro_Int_Double,
-static constexpr vtbl _binary = {
-    "binary",
-    TestFunc_Float_Float_Float,
-    TestFunc_Double_Double_Double,
-static constexpr vtbl _binary_operator = {
-    "binaryOperator",
-    TestFunc_Float_Float_Float_Operator,
-    TestFunc_Double_Double_Double_Operator,
-static constexpr vtbl _binary_i = {
-    "binary_i",
-    TestFunc_Float_Float_Int,
-    TestFunc_Double_Double_Int,
-static constexpr vtbl _macro_binary = {
-    "macro_binary",
-    TestMacro_Int_Float_Float,
-    TestMacro_Int_Double_Double,
-static constexpr vtbl _ternary = {
-    "ternary",
-    TestFunc_Float_Float_Float_Float,
-    TestFunc_Double_Double_Double_Double,
-static constexpr vtbl _unary_two_results = {
-    "unary_two_results",
-    TestFunc_Float2_Float,
-    TestFunc_Double2_Double,
-static constexpr vtbl _unary_two_results_i = {
-    "unary_two_results_i",
-    TestFunc_FloatI_Float,
-    TestFunc_DoubleI_Double,
-static constexpr vtbl _binary_two_results_i = {
-    "binary_two_results_i",
-    TestFunc_FloatI_Float_Float,
-    TestFunc_DoubleI_Double_Double,
-static constexpr vtbl _mad_tbl = {
-    "ternary",
-    TestFunc_mad_Float,
-    TestFunc_mad_Double,
-#define unaryF &_unary
-#define i_unaryF &_i_unary
-#define unaryF_u &_unary_u
-#define macro_unaryF &_macro_unary
-#define binaryF &_binary
-#define binaryOperatorF &_binary_operator
-#define binaryF_i &_binary_i
-#define macro_binaryF &_macro_binary
-#define ternaryF &_ternary
-#define unaryF_two_results &_unary_two_results
-#define unaryF_two_results_i &_unary_two_results_i
-#define binaryF_two_results_i &_binary_two_results_i
-#define mad_function &_mad_tbl
-const Func functionList[] = {
-    ENTRY_EXT(acos, 4.0f, 4.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(acosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(acospi, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(asin, 4.0f, 4.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(asinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(asinpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(atan, 5.0f, 5.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(atanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY(atanpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY(atan2, 6.0f, 6.0f, FTZ_OFF, binaryF),
-    ENTRY(atan2pi, 6.0f, 6.0f, FTZ_OFF, binaryF),
-    ENTRY(cbrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(ceil, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(copysign, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY_EXT(cos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
-              0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(cosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(cospi, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
-              0.00048828125f), // relaxed ulp 2^-11
-    //                                  ENTRY( erfc,                  16.0f,
-    //                                  16.0f,         FTZ_OFF,     unaryF),
-    //                                  //disabled for 1.0 due to lack of
-    //                                  reference implementation ENTRY( erf,
-    //                                  16.0f,         16.0f,         FTZ_OFF,
-    //                                  unaryF), //disabled for 1.0 due to lack
-    //                                  of reference implementation
-    ENTRY_EXT(exp, 3.0f, 4.0f, 3.0f, FTZ_OFF, unaryF,
-              4.0f), // relaxed error is actually overwritten in unary.c as it
-                     // is 3+floor(fabs(2*x))
-    ENTRY_EXT(exp2, 3.0f, 4.0f, 3.0f, FTZ_OFF, unaryF,
-              4.0f), // relaxed error is actually overwritten in unary.c as it
-                     // is 3+floor(fabs(2*x))
-    ENTRY_EXT(exp10, 3.0f, 4.0f, 8192.0f, FTZ_OFF, unaryF,
-              8192.0f), // relaxed error is actually overwritten in unary.c as
-                        // it is 3+floor(fabs(2*x)) in derived mode,
-    // in non-derived mode it uses the ulp error for half_exp10.
-    ENTRY(expm1, 3.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(fabs, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(fdim, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(floor, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(fma, 0.0f, 0.0f, FTZ_OFF, ternaryF),
-    ENTRY(fmax, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fmin, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fmod, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fract, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
-    ENTRY(frexp, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
-    ENTRY(hypot, 4.0f, 4.0f, FTZ_OFF, binaryF),
-    ENTRY(ilogb, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
-    ENTRY(isequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isfinite, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isgreaterequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isinf, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isless, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(islessequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(islessgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isnan, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isnormal, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isnotequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isunordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(ldexp, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
-    ENTRY(lgamma, INFINITY, INFINITY, FTZ_OFF, unaryF),
-    ENTRY(lgamma_r, INFINITY, INFINITY, FTZ_OFF, unaryF_two_results_i),
-    ENTRY_EXT(log, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
-              4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY_EXT(log2, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
-              4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY_EXT(log10, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
-              4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY(log1p, 2.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(logb, 0.0f, 0.0f, FTZ_OFF, unaryF),
-              INFINITY), // in fast-relaxed-math mode it has to be either
-                         // exactly rounded fma or exactly rounded a*b+c
-    ENTRY(maxmag, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(minmag, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(modf, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
-    ENTRY(nan, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
-    ENTRY(nextafter, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY_EXT(pow, 16.0f, 16.0f, 8192.0f, FTZ_OFF, binaryF,
-              8192.0f), // in derived mode the ulp error is calculated as
-                        // exp2(y*log2(x)) and in non-derived it is the same as
-                        // half_pow
-    ENTRY(pown, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
-    ENTRY(powr, 16.0f, 16.0f, FTZ_OFF, binaryF),
-    //                                  ENTRY( reciprocal,            1.0f,
-    //                                  1.0f,         FTZ_OFF,     unaryF),
-    ENTRY(remainder, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(remquo, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
-    ENTRY(rint, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(rootn, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
-    ENTRY(round, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(rsqrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(signbit, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY_EXT(sin, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
-              0.00048828125f), // relaxed ulp 2^-11
-    ENTRY_EXT(sincos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF_two_results,
-              0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(sinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(sinpi, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
-              0.00048828125f), // relaxed ulp 2^-11
-    { "sqrt",
-      "sqrt",
-      { (void*)reference_sqrt },
-      { (void*)reference_sqrtl },
-      { NULL },
-      3.0f,
-      0.0f,
-      4.0f,
-      INFINITY,
-      INFINITY,
-      FTZ_OFF,
-      unaryF },
-    { "sqrt_cr",
-      "sqrt",
-      { (void*)reference_sqrt },
-      { (void*)reference_sqrtl },
-      { NULL },
-      0.0f,
-      0.0f,
-      0.0f,
-      INFINITY,
-      INFINITY,
-      FTZ_OFF,
-      unaryF },
-        tan, 5.0f, 5.0f, 8192.0f, FTZ_OFF, unaryF,
-        8192.0f), // in derived mode it the ulp error is calculated as sin/cos
-                  // and in non-derived mode it is the same as half_tan.
-    ENTRY(tanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY(tanpi, 6.0f, 6.0f, FTZ_OFF, unaryF),
-    //                                    ENTRY( tgamma,                 16.0f,
-    //                                    16.0f,         FTZ_OFF,     unaryF),
-    //                                    // Commented this out until we can be
-    //                                    sure this requirement is realistic
-    ENTRY(trunc, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    HALF_ENTRY(cos, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(divide, 8192.0f, 8192.0f, FTZ_ON, binaryF),
-    HALF_ENTRY(exp, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(exp2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(exp10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(log, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(log2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(log10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(powr, 8192.0f, 8192.0f, FTZ_ON, binaryF),
-    HALF_ENTRY(recip, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(rsqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(sin, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(sqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(tan, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    // basic operations
-    OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    { "divide",
-      "/",
-      { (void*)reference_divide },
-      { (void*)reference_dividel },
-      { (void*)reference_relaxed_divide },
-      2.5f,
-      0.0f,
-      3.0f,
-      2.5f,
-      INFINITY,
-      FTZ_OFF,
-      RELAXED_ON,
-      binaryOperatorF },
-    { "divide_cr",
-      "/",
-      { (void*)reference_divide },
-      { (void*)reference_dividel },
-      { (void*)reference_relaxed_divide },
-      0.0f,
-      0.0f,
-      0.0f,
-      0.f,
-      INFINITY,
-      FTZ_OFF,
-      binaryOperatorF },
-    OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, FTZ_OFF,
-                   unaryF), // A simple copy operation
-    OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-const size_t functionListCount = sizeof(functionList) / sizeof(functionList[0]);
diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h
deleted file mode 100644
index 38f739c..0000000
--- a/test_conformance/math_brute_force/function_list.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "harness/compat.h"
-#ifndef WIN32
-#include <unistd.h>
-#if defined(__APPLE__)
-#include <OpenCL/opencl.h>
-#include <CL/cl.h>
-#include "harness/mt19937.h"
-typedef union fptr {
-    void *p;
-    double (*f_f)(double);
-    double (*f_u)(cl_uint);
-    int (*i_f)(double);
-    int (*i_f_f)(float);
-    float (*f_ff_f)(float, float);
-    double (*f_ff)(double, double);
-    int (*i_ff)(double, double);
-    double (*f_fi)(double, int);
-    double (*f_fpf)(double, double *);
-    double (*f_fpI)(double, int *);
-    double (*f_ffpI)(double, double, int *);
-    double (*f_fff)(double, double, double);
-    float (*f_fma)(float, float, float, int);
-} fptr;
-typedef union dptr {
-    void *p;
-    long double (*f_f)(long double);
-    long double (*f_u)(cl_ulong);
-    int (*i_f)(long double);
-    long double (*f_ff)(long double, long double);
-    int (*i_ff)(long double, long double);
-    long double (*f_fi)(long double, int);
-    long double (*f_fpf)(long double, long double *);
-    long double (*f_fpI)(long double, int *);
-    long double (*f_ffpI)(long double, long double, int *);
-    long double (*f_fff)(long double, long double, long double);
-} dptr;
-struct Func;
-typedef struct vtbl
-    const char *type_name;
-    int (*TestFunc)(const struct Func *, MTdata, bool);
-    int (*DoubleTestFunc)(
-        const struct Func *, MTdata,
-        bool); // may be NULL if function is single precision only
-} vtbl;
-typedef struct Func
-    const char *name; // common name, to be used as an argument in the shell
-    const char *nameInCode; // name as it appears in the __kernel, usually the
-                            // same as name, but different for multiplication
-    fptr func;
-    dptr dfunc;
-    fptr rfunc;
-    float float_ulps;
-    float double_ulps;
-    float float_embedded_ulps;
-    float relaxed_error;
-    float relaxed_embedded_error;
-    int ftz;
-    int relaxed;
-    const vtbl *vtbl_ptr;
-} Func;
-extern const Func functionList[];
-extern const size_t functionListCount;
diff --git a/test_conformance/math_brute_force/i_unary.cpp b/test_conformance/math_brute_force/i_unary.cpp
new file mode 100644
index 0000000..75b9424
--- /dev/null
+++ b/test_conformance/math_brute_force/i_unary.cpp
@@ -0,0 +1,627 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include "FunctionList.h"
+int TestFunc_Int_Float(const Func *f, MTdata);
+int TestFunc_Int_Double(const Func *f, MTdata);
+extern const vtbl _i_unary = { "i_unary", TestFunc_Int_Float,
+                               TestFunc_Int_Double };
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = ", name, "( f0 );\n"
+                            "       vstore3( i0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       int3 i0 = ", name, "( f0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                        "       int3 i0 = ", name, "( f0 );\n"
+                        "       vstore3( i0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       double3 f0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       int3 i0 = ", name, "( f0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = i0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = i0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+int TestFunc_Int_Float(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ;
+    size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    // This test is not using ThreadPool so we need to disable FTZ here
+    // for reference computations
+    FPU_mode_type oldMode;
+    DisableFTZ(&oldMode);
+    Force64BitFPUPrecision();
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j * scale;
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        int *r = (int *)gOut_Ref;
+        float *s = (float *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = f->func.i_f( s[j] );
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    if( ftz && IsFloatSubnormal(s[j]))
+                    {
+                        unsigned int correct0 = f->func.i_f( 0.0 );
+                        unsigned int correct1 = f->func.i_f( -0.0 );
+                        if( q[j] == correct0 || q[j] == correct1 )
+                            continue;
+                    }
+                    uint32_t err = t[j] - q[j];
+                    if( q[j] > t[j] )
+                        err = q[j] - t[j];
+                    vlog_error( "\nERROR: %s%s: %d ulp error at %a (0x%8.8x): *%d vs. %d\n", f->name, sizeNames[k], err, ((float*) gIn)[j], ((cl_uint*) gIn)[j], t[j], q[j] );
+                  error = -1;
+                  goto exit;
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    vlog( "\n" );
+    RestoreFPState(&oldMode);
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
+int TestFunc_Int_Double(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    int ftz = f->ftz || gForceFTZ;
+    size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( cl_double );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    // This test is not using ThreadPool so we need to disable FTZ here
+    // for reference computations
+    FPU_mode_type oldMode;
+    DisableFTZ(&oldMode);
+    Force64BitFPUPrecision();
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32( (uint32_t) i + j * scale );
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32( (uint32_t) i + j );
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        int *r = (int *)gOut_Ref;
+        double *s = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+            r[j] = f->dfunc.i_f( s[j] );
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    if( ftz && IsDoubleSubnormal(s[j]))
+                    {
+                        unsigned int correct0 = f->dfunc.i_f( 0.0 );
+                        unsigned int correct1 = f->dfunc.i_f( -0.0 );
+                        if( q[j] == correct0 || q[j] == correct1 )
+                            continue;
+                    }
+                    uint32_t err = t[j] - q[j];
+                    if( q[j] > t[j] )
+                        err = q[j] - t[j];
+                    vlog_error( "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n", f->name, sizeNames[k], err, ((double*) gIn)[j], t[j], q[j] );
+                  error = -1;
+                  goto exit;
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+            if (gVerboseBruteForce)
+            {
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+            } else
+            {
+               vlog("." );
+            }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+            p[j] = DoubleFromUInt32( genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    vlog( "\n" );
+    RestoreFPState(&oldMode);
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
deleted file mode 100644
index 4383fa8..0000000
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    int ftz = f->ftz || gForceFTZ;
-    uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
-    int scale =
-        (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1);
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    // This test is not using ThreadPool so we need to disable FTZ here
-    // for reference computations
-    FPU_mode_type oldMode;
-    DisableFTZ(&oldMode);
-    Force64BitFPUPrecision();
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j);
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        int *r = (int *)gOut_Ref;
-        double *s = (double *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-            r[j] = f->dfunc.i_f(s[j]);
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    if (ftz && IsDoubleSubnormal(s[j]))
-                    {
-                        unsigned int correct0 = f->dfunc.i_f(0.0);
-                        unsigned int correct1 = f->dfunc.i_f(-0.0);
-                        if (q[j] == correct0 || q[j] == correct1) continue;
-                    }
-                    uint32_t err = t[j] - q[j];
-                    if (q[j] > t[j]) err = q[j] - t[j];
-                    vlog_error(
-                        "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n",
-                        f->name, sizeNames[k], err, ((double *)gIn)[j], t[j],
-                        q[j]);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-    vlog("\n");
-    RestoreFPState(&oldMode);
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
deleted file mode 100644
index c803aa3..0000000
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ /dev/null
@@ -1,298 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in)\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1);
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    // This test is not using ThreadPool so we need to disable FTZ here
-    // for reference computations
-    FPU_mode_type oldMode;
-    DisableFTZ(&oldMode);
-    Force64BitFPUPrecision();
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        cl_uint *p = (cl_uint *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (cl_uint)i + j * scale;
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (uint32_t)i + j;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        int *r = (int *)gOut_Ref;
-        float *s = (float *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            r[j] = f->func.i_f(s[j]);
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    if (ftz && IsFloatSubnormal(s[j]))
-                    {
-                        unsigned int correct0 = f->func.i_f(0.0);
-                        unsigned int correct1 = f->func.i_f(-0.0);
-                        if (q[j] == correct0 || q[j] == correct1) continue;
-                    }
-                    uint32_t err = t[j] - q[j];
-                    if (q[j] > t[j]) err = q[j] - t[j];
-                    vlog_error("\nERROR: %s%s: %d ulp error at %a (0x%8.8x): "
-                               "*%d vs. %d\n",
-                               f->name, sizeNames[k], err, ((float *)gIn)[j],
-                               ((cl_uint *)gIn)[j], t[j], q[j]);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-    vlog("\n");
-    RestoreFPState(&oldMode);
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/macro_binary.cpp b/test_conformance/math_brute_force/macro_binary.cpp
new file mode 100644
index 0000000..0670990
--- /dev/null
+++ b/test_conformance/math_brute_force/macro_binary.cpp
@@ -0,0 +1,1234 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include "FunctionList.h"
+int TestMacro_Int_Float_Float(const Func *f, MTdata);
+int TestMacro_Int_Double_Double(const Func *f, MTdata);
+extern const vtbl _macro_binary = { "macro_binary", TestMacro_Int_Float_Float,
+                                    TestMacro_Int_Double_Double };
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i] );\n"
+        "}\n"
+    };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in, __global float* in2)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       float3 f0 = vload3( 0, in + 3 * i );\n"
+        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+        "       int3 i0 = ", name, "( f0, f1 );\n"
+        "       vstore3( i0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       float3 f0, f1;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       int3 i0 = ", name, "( f0, f1 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = i0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = i0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);   }
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i] );\n"
+        "}\n"
+    };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long* out, __global double* in, __global double* in2)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+        "       double3 f1 = vload3( 0, in2 + 3 * i );\n"
+        "       long3 l0 = ", name, "( f0, f1 );\n"
+        "       vstore3( l0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       double3 f0, f1;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+        "               f1 = (double3)( in2[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               f1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       long3 l0 = ", name, "( f0, f1 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = l0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = l0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+static const size_t specialValuesFloatCount = sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+typedef struct TestInfo
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    int         ftz;                                // non-zero if running in flush to zero mode
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+int TestMacro_Int_Float_Float(const Func *f, MTdata d)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    // Run the kernels
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+        if( error )
+            goto exit;
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    vlog( "\n" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data  )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    fptr        func = job->f->func;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_int      *t,*r;
+    cl_float    *s,*s2;
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_int  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        float *fp = (float *)p;
+        float *fp2 = (float *)p2;
+        uint32_t x, y;
+    x = (job_id * buffer_elements) % specialValuesFloatCount;
+    y = (job_id * buffer_elements) / specialValuesFloatCount;
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesFloat[x];
+            fp2[j] = specialValuesFloat[y];
+            if( ++x >= specialValuesFloatCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesFloatCount )
+                    break;
+            }
+        }
+    }
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int32(d);
+        p2[j] = genrand_int32(d);
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+    //Calculate the correctly rounded reference result
+    r = (cl_int *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (float *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = func.i_ff( s[j], s2[j] );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+    //Verify data
+    t = (cl_int *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        cl_int *q = out[0];
+        if( gMinVectorSizeIndex == 0 && t[j] != q[j] )
+        {
+            if( ftz )
+            {
+                if( IsFloatSubnormal( s[j])  )
+                {
+                    if( IsFloatSubnormal( s2[j] )  )
+                    {
+                        int correct = func.i_ff( 0.0f, 0.0f );
+                        int correct2 = func.i_ff( 0.0f, -0.0f );
+                        int correct3 = func.i_ff( -0.0f, 0.0f );
+                        int correct4 = func.i_ff( -0.0f, -0.0f );
+                        if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                            continue;
+                    }
+                    else
+                    {
+                        int correct = func.i_ff( 0.0f, s2[j] );
+                        int correct2 = func.i_ff( -0.0f, s2[j] );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+                }
+                else if( IsFloatSubnormal( s2[j] ) )
+                {
+                    int correct = func.i_ff( s[j], 0.0f );
+                    int correct2 = func.i_ff( s[j], -0.0f );
+                    if( correct == q[j] || correct2 == q[j]  )
+                        continue;
+                }
+            }
+            uint32_t err = t[j] - q[j];
+            if( q[j] > t[j] )
+                err = q[j] - t[j];
+            vlog_error( "\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. 0x%8.8x (index: %d)\n", name, err, ((float*) s)[j], ((float*) s2)[j], t[j], q[j], j );
+            error = -1;
+            goto exit;
+        }
+        for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+        {
+            q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( -t[j] != q[j] )
+            {
+                if( ftz )
+                {
+                    if( IsFloatSubnormal( s[j])  )
+                    {
+                        if( IsFloatSubnormal( s2[j] )  )
+                        {
+                            int correct = -func.i_ff( 0.0f, 0.0f );
+                            int correct2 = -func.i_ff( 0.0f, -0.0f );
+                            int correct3 = -func.i_ff( -0.0f, 0.0f );
+                            int correct4 = -func.i_ff( -0.0f, -0.0f );
+                            if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                                continue;
+                        }
+                        else
+                        {
+                            int correct = -func.i_ff( 0.0f, s2[j] );
+                            int correct2 = -func.i_ff( -0.0f, s2[j] );
+                            if( correct == q[j] || correct2 == q[j]  )
+                                continue;
+                        }
+                    }
+                    else if( IsFloatSubnormal( s2[j] ) )
+                    {
+                        int correct = -func.i_ff( s[j], 0.0f );
+                        int correct2 = -func.i_ff( s[j], -0.0f );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+                }
+                cl_uint err = -t[j] - q[j];
+                if( q[j] > -t[j] )
+                    err = q[j] + t[j];
+                vlog_error( "\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x vs. 0x%8.8x (index: %d)\n", name, sizeNames[k], err, ((float*) s)[j], ((float*) s2)[j], -t[j], q[j], j );
+                error = -1;
+                goto exit;
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+    return error;
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+int TestMacro_Int_Double_Double(const Func *f, MTdata d)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+        if( error )
+            goto exit;
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint64_t *p = (uint64_t *)gIn;
+        uint64_t *p2 = (uint64_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+        {
+            p[j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+            p2[j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    vlog( "\n" );
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    dptr        dfunc = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_long     *t,*r;
+    cl_double   *s,*s2;
+    Force64BitFPUPrecision();
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_long  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    //Init input array
+    double *p = (double *)gIn + thread_id * buffer_elements;
+    double *p2 = (double *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        uint32_t x, y;
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+        for( ; j < buffer_elements; j++ )
+        {
+            p[j] = specialValuesDouble[x];
+            p2[j] = specialValuesDouble[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesDoubleCount )
+                    break;
+            }
+        }
+    }
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        ((cl_ulong*)p)[j] = genrand_int64(d);
+        ((cl_ulong*)p2)[j] = genrand_int64(d);
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+    //Calculate the correctly rounded reference result
+    r = (cl_long *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_double *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = dfunc.i_ff( s[j], s2[j] );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+    //Verify data
+    t = (cl_long *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        cl_long *q = (cl_long *) out[0];
+        // If we aren't getting the correctly rounded result
+        if( gMinVectorSizeIndex == 0 && t[j] != q[j] )
+        {
+            if( ftz )
+            {
+                if( IsDoubleSubnormal( s[j])  )
+                {
+                    if( IsDoubleSubnormal( s2[j] )  )
+                    {
+                        int64_t correct = dfunc.i_ff( 0.0f, 0.0f );
+                        int64_t correct2 = dfunc.i_ff( 0.0f, -0.0f );
+                        int64_t correct3 = dfunc.i_ff( -0.0f, 0.0f );
+                        int64_t correct4 = dfunc.i_ff( -0.0f, -0.0f );
+                        if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                            continue;
+                    }
+                    else
+                    {
+                        int64_t correct = dfunc.i_ff( 0.0f, s2[j] );
+                        int64_t correct2 = dfunc.i_ff( -0.0f, s2[j] );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+                }
+                else if( IsDoubleSubnormal( s2[j] ) )
+                {
+                    int64_t correct = dfunc.i_ff( s[j], 0.0f );
+                    int64_t correct2 = dfunc.i_ff( s[j], -0.0f );
+                    if( correct == q[j] || correct2 == q[j]  )
+                        continue;
+                }
+            }
+            uint64_t err = t[j] - q[j];
+            if( q[j] > t[j] )
+                err = q[j] - t[j];
+            vlog_error( "\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld vs. %lld  (index: %d)\n", name, err, ((double*) s)[j], ((double*) s2)[j], t[j], q[j], j );
+            error = -1;
+            goto exit;
+        }
+        for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+        {
+            q = (cl_long*) out[k];
+            // If we aren't getting the correctly rounded result
+            if( -t[j] != q[j] )
+            {
+                if( ftz )
+                {
+                    if( IsDoubleSubnormal( s[j])  )
+                    {
+                        if( IsDoubleSubnormal( s2[j] )  )
+                        {
+                            int64_t correct = -dfunc.i_ff( 0.0f, 0.0f );
+                            int64_t correct2 = -dfunc.i_ff( 0.0f, -0.0f );
+                            int64_t correct3 = -dfunc.i_ff( -0.0f, 0.0f );
+                            int64_t correct4 = -dfunc.i_ff( -0.0f, -0.0f );
+                            if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                                continue;
+                        }
+                        else
+                        {
+                            int64_t correct = -dfunc.i_ff( 0.0f, s2[j] );
+                            int64_t correct2 = -dfunc.i_ff( -0.0f, s2[j] );
+                            if( correct == q[j] || correct2 == q[j]  )
+                                continue;
+                        }
+                    }
+                    else if( IsDoubleSubnormal( s2[j] ) )
+                    {
+                        int64_t correct = -dfunc.i_ff( s[j], 0.0f );
+                        int64_t correct2 = -dfunc.i_ff( s[j], -0.0f );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+                }
+                uint64_t err = -t[j] - q[j];
+                if( q[j] > -t[j] )
+                    err = q[j] + t[j];
+                vlog_error( "\nERROR: %sD%s: %lld ulp error at {%.13la, %.13la}: *%lld vs. %lld  (index: %d)\n", name, sizeNames[k], err, ((double*) s)[j], ((double*) s2)[j], -t[j], q[j], j );
+                error = -1;
+                goto exit;
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
deleted file mode 100644
index d09915f..0000000
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ /dev/null
@@ -1,737 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global long",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global long* out, __global double* in, __global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       double3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       long3 l0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       vstore3( l0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 f0;\n"
-        "       double3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       long3 l0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = l0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = l0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    int ftz; // non-zero if running in flush to zero mode
-} TestInfo;
-// A table of more difficult cases to get right
-static const double specialValues[] = {
-    -NAN,
-    -DBL_MAX,
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
-    -1000.0,
-    -100.0,
-    -4.0,
-    -3.5,
-    -3.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
-    -2.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
-    -2.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
-    -1.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    -1.0,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53),
-    -0.5,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54),
-    -0.25,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-    -DBL_MIN,
-    MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-    -0.0,
-    +NAN,
-    +DBL_MAX,
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
-    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
-    MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
-    MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
-    +1000.0,
-    +100.0,
-    +4.0,
-    +3.5,
-    +3.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
-    +2.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
-    +2.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
-    +1.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    +1.0,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
-    +0.5,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
-    +0.25,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
-    +DBL_MIN,
-    MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
-    +0.0,
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ftz = f->ftz || gForceFTZ;
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (size_t i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    dptr dfunc = job->f->dfunc;
-    int ftz = job->ftz;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    cl_long *t;
-    cl_long *r;
-    cl_double *s;
-    cl_double *s2;
-    Force64BitFPUPrecision();
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_long *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_long *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Init input array
-    double *p = (double *)gIn + thread_id * buffer_elements;
-    double *p2 = (double *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        uint32_t x, y;
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-        for (; idx < buffer_elements; idx++)
-        {
-            p[idx] = specialValues[x];
-            p2[idx] = specialValues[y];
-            if (++x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-        }
-    }
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        ((cl_ulong *)p)[idx] = genrand_int64(d);
-        ((cl_ulong *)p2)[idx] = genrand_int64(d);
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-    // Calculate the correctly rounded reference result
-    r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
-    s = (cl_double *)gIn + thread_id * buffer_elements;
-    s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_ff(s[j], s2[j]);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_long *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-    // Verify data
-    t = (cl_long *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        cl_long *q = out[0];
-        // If we aren't getting the correctly rounded result
-        if (gMinVectorSizeIndex == 0 && t[j] != q[j])
-        {
-            // If we aren't getting the correctly rounded result
-            if (ftz)
-            {
-                if (IsDoubleSubnormal(s[j]))
-                {
-                    if (IsDoubleSubnormal(s2[j]))
-                    {
-                        int64_t correct = dfunc.i_ff(0.0f, 0.0f);
-                        int64_t correct2 = dfunc.i_ff(0.0f, -0.0f);
-                        int64_t correct3 = dfunc.i_ff(-0.0f, 0.0f);
-                        int64_t correct4 = dfunc.i_ff(-0.0f, -0.0f);
-                        if (correct == q[j] || correct2 == q[j]
-                            || correct3 == q[j] || correct4 == q[j])
-                            continue;
-                    }
-                    else
-                    {
-                        int64_t correct = dfunc.i_ff(0.0f, s2[j]);
-                        int64_t correct2 = dfunc.i_ff(-0.0f, s2[j]);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-                else if (IsDoubleSubnormal(s2[j]))
-                {
-                    int64_t correct = dfunc.i_ff(s[j], 0.0f);
-                    int64_t correct2 = dfunc.i_ff(s[j], -0.0f);
-                    if (correct == q[j] || correct2 == q[j]) continue;
-                }
-            }
-            cl_ulong err = t[j] - q[j];
-            if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld "
-                       "vs. %lld  (index: %d)\n",
-                       name, err, ((double *)s)[j], ((double *)s2)[j], t[j],
-                       q[j], j);
-            error = -1;
-            goto exit;
-        }
-        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
-        {
-            q = (cl_long *)out[k];
-            // If we aren't getting the correctly rounded result
-            if (-t[j] != q[j])
-            {
-                if (ftz)
-                {
-                    if (IsDoubleSubnormal(s[j]))
-                    {
-                        if (IsDoubleSubnormal(s2[j]))
-                        {
-                            int64_t correct = -dfunc.i_ff(0.0f, 0.0f);
-                            int64_t correct2 = -dfunc.i_ff(0.0f, -0.0f);
-                            int64_t correct3 = -dfunc.i_ff(-0.0f, 0.0f);
-                            int64_t correct4 = -dfunc.i_ff(-0.0f, -0.0f);
-                            if (correct == q[j] || correct2 == q[j]
-                                || correct3 == q[j] || correct4 == q[j])
-                                continue;
-                        }
-                        else
-                        {
-                            int64_t correct = -dfunc.i_ff(0.0f, s2[j]);
-                            int64_t correct2 = -dfunc.i_ff(-0.0f, s2[j]);
-                            if (correct == q[j] || correct2 == q[j]) continue;
-                        }
-                    }
-                    else if (IsDoubleSubnormal(s2[j]))
-                    {
-                        int64_t correct = -dfunc.i_ff(s[j], 0.0f);
-                        int64_t correct2 = -dfunc.i_ff(s[j], -0.0f);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-                cl_ulong err = -t[j] - q[j];
-                if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, "
-                           "%.13la}: *%lld vs. %lld  (index: %d)\n",
-                           name, sizeNames[k], err, ((double *)s)[j],
-                           ((double *)s2)[j], -t[j], q[j], j);
-                error = -1;
-                goto exit;
-            }
-        }
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
deleted file mode 100644
index c530cda..0000000
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ /dev/null
@@ -1,726 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global float* in, __global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       int3 i0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    int ftz; // non-zero if running in flush to zero mode
-} TestInfo;
-// A table of more difficult cases to get right
-static const float specialValues[] = {
-    -NAN,
-    -FLT_MAX,
-    MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
-    MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
-    MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
-    MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
-    MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8),
-    MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32),
-    MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7),
-    MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31),
-    MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
-    -1000.f,
-    -100.f,
-    -4.0f,
-    -3.5f,
-    -3.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
-    -2.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
-    -2.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
-    -1.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
-    -1.0f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25),
-    -0.5f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26),
-    -0.25f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
-    -FLT_MIN,
-    MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
-    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
-    MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
-    MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
-    MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
-    -0.0f,
-    +NAN,
-    +FLT_MAX,
-    MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
-    MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
-    MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
-    MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
-    MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
-    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8),
-    MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32),
-    MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
-    MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7),
-    MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31),
-    MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
-    +1000.f,
-    +100.f,
-    +4.0f,
-    +3.5f,
-    +3.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
-    2.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
-    +2.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
-    1.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
-    +1.0f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25),
-    +0.5f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
-    MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26),
-    +0.25f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
-    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
-    +FLT_MIN,
-    MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
-    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
-    MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
-    MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
-    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
-    +0.0f,
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    int ftz = job->ftz;
-    MTdata d = tinfo->d;
-    cl_int error;
-    const char *name = job->f->name;
-    cl_int *t = 0;
-    cl_int *r = 0;
-    cl_float *s = 0;
-    cl_float *s2 = 0;
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_int *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_int *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Init input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
-    cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
-    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
-    if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
-        float *fp = (float *)p;
-        float *fp2 = (float *)p2;
-        uint32_t x, y;
-        x = (job_id * buffer_elements) % specialValuesCount;
-        y = (job_id * buffer_elements) / specialValuesCount;
-        for (; idx < buffer_elements; idx++)
-        {
-            fp[idx] = specialValues[x];
-            fp2[idx] = specialValues[y];
-            ++x;
-            if (x >= specialValuesCount)
-            {
-                x = 0;
-                y++;
-                if (y >= specialValuesCount) break;
-            }
-        }
-    }
-    // Init any remaining values.
-    for (; idx < buffer_elements; idx++)
-    {
-        p[idx] = genrand_int32(d);
-        p2[idx] = genrand_int32(d);
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
-                                      buffer_size, p2, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            goto exit;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-    // Calculate the correctly rounded reference result
-    r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
-    s = (float *)gIn + thread_id * buffer_elements;
-    s2 = (float *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++) r[j] = func.i_ff(s[j], s2[j]);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_int *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            goto exit;
-        }
-    }
-    // Verify data
-    t = (cl_int *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        cl_int *q = out[0];
-        if (gMinVectorSizeIndex == 0 && t[j] != q[j])
-        {
-            if (ftz)
-            {
-                if (IsFloatSubnormal(s[j]))
-                {
-                    if (IsFloatSubnormal(s2[j]))
-                    {
-                        int correct = func.i_ff(0.0f, 0.0f);
-                        int correct2 = func.i_ff(0.0f, -0.0f);
-                        int correct3 = func.i_ff(-0.0f, 0.0f);
-                        int correct4 = func.i_ff(-0.0f, -0.0f);
-                        if (correct == q[j] || correct2 == q[j]
-                            || correct3 == q[j] || correct4 == q[j])
-                            continue;
-                    }
-                    else
-                    {
-                        int correct = func.i_ff(0.0f, s2[j]);
-                        int correct2 = func.i_ff(-0.0f, s2[j]);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-                else if (IsFloatSubnormal(s2[j]))
-                {
-                    int correct = func.i_ff(s[j], 0.0f);
-                    int correct2 = func.i_ff(s[j], -0.0f);
-                    if (correct == q[j] || correct2 == q[j]) continue;
-                }
-            }
-            uint32_t err = t[j] - q[j];
-            if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. "
-                       "0x%8.8x (index: %d)\n",
-                       name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j],
-                       j);
-            error = -1;
-            goto exit;
-        }
-        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
-        {
-            q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (-t[j] != q[j])
-            {
-                if (ftz)
-                {
-                    if (IsFloatSubnormal(s[j]))
-                    {
-                        if (IsFloatSubnormal(s2[j]))
-                        {
-                            int correct = -func.i_ff(0.0f, 0.0f);
-                            int correct2 = -func.i_ff(0.0f, -0.0f);
-                            int correct3 = -func.i_ff(-0.0f, 0.0f);
-                            int correct4 = -func.i_ff(-0.0f, -0.0f);
-                            if (correct == q[j] || correct2 == q[j]
-                                || correct3 == q[j] || correct4 == q[j])
-                                continue;
-                        }
-                        else
-                        {
-                            int correct = -func.i_ff(0.0f, s2[j]);
-                            int correct2 = -func.i_ff(-0.0f, s2[j]);
-                            if (correct == q[j] || correct2 == q[j]) continue;
-                        }
-                    }
-                    else if (IsFloatSubnormal(s2[j]))
-                    {
-                        int correct = -func.i_ff(s[j], 0.0f);
-                        int correct2 = -func.i_ff(s[j], -0.0f);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-                cl_uint err = -t[j] - q[j];
-                if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error("\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x "
-                           "vs. 0x%8.8x (index: %d)\n",
-                           name, sizeNames[k], err, ((float *)s)[j],
-                           ((float *)s2)[j], -t[j], q[j], j);
-                error = -1;
-                goto exit;
-            }
-        }
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/macro_unary.cpp b/test_conformance/math_brute_force/macro_unary.cpp
new file mode 100644
index 0000000..c8d125b
--- /dev/null
+++ b/test_conformance/math_brute_force/macro_unary.cpp
@@ -0,0 +1,989 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include "FunctionList.h"
+int TestMacro_Int_Float(const Func *f, MTdata);
+int TestMacro_Int_Double(const Func *f, MTdata);
+extern const vtbl _macro_unary = { "macro_unary", TestMacro_Int_Float,
+                                   TestMacro_Int_Double };
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = ", name, "( f0 );\n"
+                            "       vstore3( i0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       int3 i0;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       i0 = ", name, "( f0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long* out, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                        "       long3 l0 = ", name, "( d0 );\n"
+                        "       vstore3( l0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       double3 d0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       long3 l0 = ", name, "( d0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = l0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = l0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+typedef struct TestInfo
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    int         ftz;                                // non-zero if running in flush to zero mode
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+int TestMacro_Int_Float(const Func *f, MTdata d)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode )
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gOutBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+        if( error )
+            goto exit;
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        cl_uint *p = (cl_uint *)gIn;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    vlog( "\n" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    fptr    func = job->f->func;
+    int     ftz = job->ftz;
+    cl_uint j, k;
+    cl_int error = CL_SUCCESS;
+    cl_int ret   = CL_SUCCESS;
+    const char *name = job->f->name;
+    int signbit_test = 0;
+    if(!strcmp(name, "signbit"))
+        signbit_test = 1;
+    #define ref_func(s) ( signbit_test ? func.i_f_f( s ) : func.i_f( s ) )
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_int  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    // Write the new values to the input array
+    cl_uint *p = (cl_uint*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        p[j] = base + j * scale;
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+    //Calculate the correctly rounded reference result
+    cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
+    float *s = (float *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = ref_func( s[j] );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+    //Verify data
+    cl_int *t = (cl_int *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_int *q = out[0];
+            // If we aren't getting the correctly rounded result
+            if( gMinVectorSizeIndex == 0 && t[j] != q[j])
+            {
+                // If we aren't getting the correctly rounded result
+                if( ftz )
+                {
+                    if( IsFloatSubnormal( s[j]) )
+                    {
+                        int correct = ref_func( +0.0f );
+                        int correct2 = ref_func( -0.0f );
+                        if( correct == q[j] || correct2 == q[j] )
+                            continue;
+                    }
+                }
+                uint32_t err = t[j] - q[j];
+                if( q[j] > t[j] )
+                    err = q[j] - t[j];
+                vlog_error( "\nERROR: %s: %d ulp error at %a: *%d vs. %d\n", name,  err, ((float*) s)[j], t[j], q[j] );
+                error = -1;
+                goto exit;
+            }
+            for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+            {
+                q = out[k];
+                // If we aren't getting the correctly rounded result
+                if( -t[j] != q[j] )
+                {
+                    if( ftz )
+                    {
+                        if( IsFloatSubnormal( s[j]))
+                        {
+                            int correct = -ref_func( +0.0f );
+                            int correct2 = -ref_func( -0.0f );
+                            if( correct == q[j] || correct2 == q[j] )
+                                continue;
+                        }
+                    }
+                    uint32_t err = -t[j] - q[j];
+                    if( q[j] > -t[j] )
+                        err = q[j] + t[j];
+                    vlog_error( "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name, sizeNames[k], err, ((float*) s)[j], -t[j], q[j] );
+                  error = -1;
+                  goto exit;
+                }
+            }
+        }
+    }
+    ret = error;
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+    {
+        vlog( "clFlush 3 failed\n" );
+        return error;
+    }
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+    return ret;
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data );
+int TestMacro_Int_Double(const Func *f, MTdata d)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode )
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+        if( error )
+            goto exit;
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        cl_ulong *p = (cl_ulong *)gIn;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    vlog( "\n" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    dptr    dfunc = job->f->dfunc;
+    cl_uint j, k;
+    cl_int error;
+    int ftz = job->ftz;
+    const char *name = job->f->name;
+    Force64BitFPUPrecision();
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_long *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    // Write the new values to the input array
+    cl_double *p = (cl_double*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        p[j] = DoubleFromUInt32( base + j * scale);
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+    //Calculate the correctly rounded reference result
+    cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
+    cl_double *s = (cl_double *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = dfunc.i_f( s[j] );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+    //Verify data
+    cl_long *t = (cl_long *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        cl_long *q = out[0];
+        // If we aren't getting the correctly rounded result
+        if( gMinVectorSizeIndex == 0 && t[j] != q[j])
+        {
+            // If we aren't getting the correctly rounded result
+            if( ftz )
+            {
+                if( IsDoubleSubnormal( s[j]) )
+                {
+                    cl_long correct = dfunc.i_f( +0.0f );
+                    cl_long correct2 = dfunc.i_f( -0.0f );
+                    if( correct == q[j] || correct2 == q[j] )
+                        continue;
+                }
+            }
+            cl_ulong err = t[j] - q[j];
+            if( q[j] > t[j] )
+                err = q[j] - t[j];
+            vlog_error( "\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n", name,  err, ((double*) gIn)[j], t[j], q[j] );
+            return -1;
+        }
+        for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+        {
+            q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( -t[j] != q[j] )
+            {
+                if( ftz )
+                {
+                    if( IsDoubleSubnormal( s[j]))
+                    {
+                        int64_t correct = -dfunc.i_f( +0.0f );
+                        int64_t correct2 = -dfunc.i_f( -0.0f );
+                        if( correct == q[j] || correct2 == q[j] )
+                            continue;
+                    }
+                }
+                cl_ulong err = -t[j] - q[j];
+                if( q[j] > -t[j] )
+                    err = q[j] + t[j];
+                vlog_error( "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n", name, sizeNames[k], err, ((double*) gIn)[j], -t[j], q[j] );
+                return -1;
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+    return CL_SUCCESS;
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
deleted file mode 100644
index 00e65a2..0000000
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ /dev/null
@@ -1,508 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global long",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global long* out, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       long3 l0 = ",
-        name,
-        "( d0 );\n"
-        "       vstore3( l0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       long3 l0 = ",
-        name,
-        "( d0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = l0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = l0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    int ftz; // non-zero if running in flush to zero mode
-} TestInfo;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ftz = f->ftz || gForceFTZ;
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint scale = job->scale;
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    dptr dfunc = job->f->dfunc;
-    int ftz = job->ftz;
-    cl_int error;
-    const char *name = job->f->name;
-    Force64BitFPUPrecision();
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_long *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_long *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Write the new values to the input array
-    cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        p[j] = DoubleFromUInt32(base + j * scale);
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        return error;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            return error;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-    // Calculate the correctly rounded reference result
-    cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
-    cl_double *s = (cl_double *)p;
-    for (size_t j = 0; j < buffer_elements; j++) r[j] = dfunc.i_f(s[j]);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_long *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Verify data
-    cl_long *t = (cl_long *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        cl_long *q = out[0];
-        // If we aren't getting the correctly rounded result
-        if (gMinVectorSizeIndex == 0 && t[j] != q[j])
-        {
-            // If we aren't getting the correctly rounded result
-            if (ftz)
-            {
-                if (IsDoubleSubnormal(s[j]))
-                {
-                    cl_long correct = dfunc.i_f(+0.0f);
-                    cl_long correct2 = dfunc.i_f(-0.0f);
-                    if (correct == q[j] || correct2 == q[j]) continue;
-                }
-            }
-            cl_ulong err = t[j] - q[j];
-            if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n",
-                       name, err, ((double *)gIn)[j], t[j], q[j]);
-            return -1;
-        }
-        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
-        {
-            q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (-t[j] != q[j])
-            {
-                if (ftz)
-                {
-                    if (IsDoubleSubnormal(s[j]))
-                    {
-                        int64_t correct = -dfunc.i_f(+0.0f);
-                        int64_t correct2 = -dfunc.i_f(-0.0f);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-                cl_ulong err = -t[j] - q[j];
-                if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error(
-                    "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n",
-                    name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]);
-                return -1;
-            }
-        }
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    return CL_SUCCESS;
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
deleted file mode 100644
index 3c1717a..0000000
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ /dev/null
@@ -1,523 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       int3 i0;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       i0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    int ftz; // non-zero if running in flush to zero mode
-} TestInfo;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint scale = job->scale;
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    int ftz = job->ftz;
-    cl_int error = CL_SUCCESS;
-    cl_int ret = CL_SUCCESS;
-    const char *name = job->f->name;
-    int signbit_test = 0;
-    if (!strcmp(name, "signbit")) signbit_test = 1;
-#define ref_func(s) (signbit_test ? func.i_f_f(s) : func.i_f(s))
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_int *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_int *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Init input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        return error;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            return error;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-    // Calculate the correctly rounded reference result
-    cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
-    float *s = (float *)p;
-    for (size_t j = 0; j < buffer_elements; j++) r[j] = ref_func(s[j]);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_int *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Verify data
-    cl_int *t = (cl_int *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_int *q = out[0];
-            // If we aren't getting the correctly rounded result
-            if (gMinVectorSizeIndex == 0 && t[j] != q[j])
-            {
-                // If we aren't getting the correctly rounded result
-                if (ftz)
-                {
-                    if (IsFloatSubnormal(s[j]))
-                    {
-                        int correct = ref_func(+0.0f);
-                        int correct2 = ref_func(-0.0f);
-                        if (correct == q[j] || correct2 == q[j]) continue;
-                    }
-                }
-                uint32_t err = t[j] - q[j];
-                if (q[j] > t[j]) err = q[j] - t[j];
-                vlog_error("\nERROR: %s: %d ulp error at %a: *%d vs. %d\n",
-                           name, err, ((float *)s)[j], t[j], q[j]);
-                error = -1;
-                goto exit;
-            }
-            for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
-                 k++)
-            {
-                q = out[k];
-                // If we aren't getting the correctly rounded result
-                if (-t[j] != q[j])
-                {
-                    if (ftz)
-                    {
-                        if (IsFloatSubnormal(s[j]))
-                        {
-                            int correct = -ref_func(+0.0f);
-                            int correct2 = -ref_func(-0.0f);
-                            if (correct == q[j] || correct2 == q[j]) continue;
-                        }
-                    }
-                    uint32_t err = -t[j] - q[j];
-                    if (q[j] > -t[j]) err = q[j] + t[j];
-                    vlog_error(
-                        "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name,
-                        sizeNames[k], err, ((float *)s)[j], -t[j], q[j]);
-                    error = -1;
-                    goto exit;
-                }
-            }
-        }
-    }
-    ret = error;
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue)))
-    {
-        vlog("clFlush 3 failed\n");
-        return error;
-    }
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    return ret;
diff --git a/test_conformance/math_brute_force/mad.cpp b/test_conformance/math_brute_force/mad.cpp
new file mode 100644
index 0000000..5eeae35
--- /dev/null
+++ b/test_conformance/math_brute_force/mad.cpp
@@ -0,0 +1,1128 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include "FunctionList.h"
+int TestFunc_mad(const Func *f, MTdata);
+int TestFunc_mad_Double(const Func *f, MTdata);
+extern const vtbl _mad_tbl = { "ternary", TestFunc_mad, TestFunc_mad_Double };
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = {
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2,  __global float", sizeNames[vectorSize], "* in3 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2, __global float* in3)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
+                            "       f0 = ", name, "( f0, f1, f2 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1, f2;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, f1, f2 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = {
+                            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2,  __global double", sizeNames[vectorSize], "* in3 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2, __global double* in3)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
+                            "       d0 = ", name, "( d0, d1, d2 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1, d2;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = ", name, "( d0, d1, d2 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+int TestFunc_mad(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+//    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    float maxErrorVal3 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        float *s = (float *)gIn;
+        float *s2 = (float *)gIn2;
+        float *s3 = (float *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = (float) f->func.f_fff( s[j], s2[j], s3[j] );
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data  -- Commented out on purpose. no verification possible. MAD is a random number generator.
+        uint32_t *t = gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = gOut[k];
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float test = ((float*) q)[j];
+                    double correct = f->func.f_fff( s[j], s2[j], s3[j] );
+                    float err = Ulp_Error( test, correct );
+                    int fail = ! (fabsf(err) <= f->float_ulps);
+                    if( fail && ftz )
+                    {
+                        // retry per section
+                        if( IsFloatSubnormal(correct) )
+                        { // look at me,
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        // retry per section
+                        if( fail && IsFloatSubnormal( s[j] ) )
+                        { // look at me,
+                            double correct2 = f->func.f_fff( 0.0, s2[j], s3[j] );
+                            double correct3 = f->func.f_fff( -0.0, s2[j], s3[j] );
+                            float err2 = Ulp_Error( test, correct2  );
+                            float err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) )
+                            { // look at me now,
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                            //try with first two args as zero
+                            if( IsFloatSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                correct2 = f->func.f_fff( 0.0, 0.0, s3[j] );
+                                correct3 = f->func.f_fff( -0.0, 0.0, s3[j] );
+                                double correct4 = f->func.f_fff( 0.0, -0.0, s3[j] );
+                                double correct5 = f->func.f_fff( -0.0, -0.0, s3[j] );
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                float err4 = Ulp_Error( test, correct4  );
+                                float err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                 (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) ||
+                                    IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                                if( IsFloatSubnormal( s3[j] )  )
+                                { // but you have to know how!
+                                    correct2 = f->func.f_fff( 0.0, 0.0, 0.0f );
+                                    correct3 = f->func.f_fff( -0.0, 0.0, 0.0f );
+                                    correct4 = f->func.f_fff( 0.0, -0.0, 0.0f );
+                                    correct5 = f->func.f_fff( -0.0, -0.0, 0.0f );
+                                    double correct6 = f->func.f_fff( 0.0, 0.0, -0.0f );
+                                    double correct7 = f->func.f_fff( -0.0, 0.0, -0.0f );
+                                    double correct8 = f->func.f_fff( 0.0, -0.0, -0.0f );
+                                    double correct9 = f->func.f_fff( -0.0, -0.0, -0.0f );
+                                    err2 = Ulp_Error( test, correct2  );
+                                    err3 = Ulp_Error( test, correct3  );
+                                    err4 = Ulp_Error( test, correct4  );
+                                    err5 = Ulp_Error( test, correct5  );
+                                    float err6 = Ulp_Error( test, correct6  );
+                                    float err7 = Ulp_Error( test, correct7  );
+                                    float err8 = Ulp_Error( test, correct8  );
+                                    float err9 = Ulp_Error( test, correct9  );
+                                    fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                     (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)) &&
+                                                     (!(fabsf(err5) <= f->float_ulps)) && (!(fabsf(err6) <= f->float_ulps)) &&
+                                                     (!(fabsf(err7) <= f->float_ulps)) && (!(fabsf(err8) <= f->float_ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+                                    if( fabsf( err6 ) < fabsf(err ) )
+                                        err = err6;
+                                    if( fabsf( err7 ) < fabsf(err ) )
+                                        err = err7;
+                                    if( fabsf( err8 ) < fabsf(err ) )
+                                        err = err8;
+                                    if( fabsf( err9 ) < fabsf(err ) )
+                                        err = err9;
+                                    // retry per section
+                                    if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps )  ||
+                                        IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps )  ||
+                                        IsFloatResultSubnormal( correct6, f->float_ulps ) || IsFloatResultSubnormal(correct7, f->float_ulps )  ||
+                                        IsFloatResultSubnormal(correct8, f->float_ulps ) || IsFloatResultSubnormal( correct9, f->float_ulps )  )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->func.f_fff( 0.0, s2[j], 0.0 );
+                                correct3 = f->func.f_fff( -0.0, s2[j], 0.0 );
+                                double correct4 = f->func.f_fff( 0.0,  s2[j], -0.0 );
+                                double correct5 = f->func.f_fff( -0.0, s2[j], -0.0 );
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                float err4 = Ulp_Error( test, correct4  );
+                                float err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                 (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps )  ||
+                                    IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal( s2[j] ) )
+                        {
+                            double correct2 = f->func.f_fff( s[j], 0.0, s3[j] );
+                            double correct3 = f->func.f_fff( s[j], -0.0, s3[j] );
+                            float err2 = Ulp_Error( test, correct2  );
+                            float err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsFloatResultSubnormal(correct2, f->float_ulps )  || IsFloatResultSubnormal(correct3, f->float_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                            //try with second two args as zero
+                            if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->func.f_fff( s[j], 0.0, 0.0 );
+                                correct3 = f->func.f_fff( s[j], -0.0, 0.0 );
+                                double correct4 = f->func.f_fff( s[j], 0.0, -0.0 );
+                                double correct5 = f->func.f_fff( s[j], -0.0, -0.0 );
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                float err4 = Ulp_Error( test, correct4  );
+                                float err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                 (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) ||
+                                    IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal(s3[j]) )
+                        {
+                            double correct2 = f->func.f_fff( s[j], s2[j], 0.0 );
+                            double correct3 = f->func.f_fff( s[j], s2[j], -0.0 );
+                            float err2 = Ulp_Error( test, correct2  );
+                            float err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a, %a}: *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((float*) gOut_Ref)[j], test );
+ error = -1;
+ goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+            vlog("." );
+            fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "pass" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
+int TestFunc_mad_Double(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+//    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    double maxErrorVal3 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    uint64_t step = bufferSize / sizeof( double );
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        double *s = (double *)gIn;
+        double *s2 = (double *)gIn2;
+        double *s3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            r[j] = (double) f->dfunc.f_fff( s[j], s2[j], s3[j] );
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data  -- Commented out on purpose. no verification possible. MAD is a random number generator.
+        uint64_t *t = gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = gOut[k];
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    double test = ((double*) q)[j];
+                    long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    int fail = ! (fabsf(err) <= f->double_ulps);
+                    if( fail && ftz )
+                    {
+                        // retry per section
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps) )
+                        { // look at me,
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        // retry per section
+                        if( fail && IsDoubleSubnormal( s[j] ) )
+                        { // look at me,
+                            long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
+                            long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            { // look at me now,
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                            //try with first two args as zero
+                            if( IsDoubleSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                correct2 = f->dfunc.f_fff( 0.0, 0.0, s3[j] );
+                                correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
+                                long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
+                                long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                    IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                                if( IsDoubleSubnormal( s3[j] )  )
+                                { // but you have to know how!
+                                    correct2 = f->dfunc.f_fff( 0.0, 0.0, 0.0f );
+                                    correct3 = f->dfunc.f_fff( -0.0, 0.0, 0.0f );
+                                    correct4 = f->dfunc.f_fff( 0.0, -0.0, 0.0f );
+                                    correct5 = f->dfunc.f_fff( -0.0, -0.0, 0.0f );
+                                    long double correct6 = f->dfunc.f_fff( 0.0, 0.0, -0.0f );
+                                    long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
+                                    long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
+                                    long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
+                                    err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                    err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                    err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                    err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                    float err6 = Bruteforce_Ulp_Error_Double( test, correct6  );
+                                    float err7 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                    float err8 = Bruteforce_Ulp_Error_Double( test, correct8  );
+                                    float err9 = Bruteforce_Ulp_Error_Double( test, correct9  );
+                                    fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                     (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
+                                                     (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
+                                                     (!(fabsf(err7) <= f->double_ulps)) && (!(fabsf(err8) <= f->double_ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+                                    if( fabsf( err6 ) < fabsf(err ) )
+                                        err = err6;
+                                    if( fabsf( err7 ) < fabsf(err ) )
+                                        err = err7;
+                                    if( fabsf( err8 ) < fabsf(err ) )
+                                        err = err8;
+                                    if( fabsf( err9 ) < fabsf(err ) )
+                                        err = err9;
+                                    // retry per section
+                                    if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                        IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps )  ||
+                                        IsDoubleResultSubnormal( correct6, f->double_ulps ) || IsDoubleResultSubnormal( correct7, f->double_ulps )  ||
+                                        IsDoubleResultSubnormal( correct8, f->double_ulps ) || IsDoubleResultSubnormal( correct9, f->double_ulps )  )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( 0.0, s2[j], 0.0 );
+                                correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
+                                long double correct4 = f->dfunc.f_fff( 0.0,  s2[j], -0.0 );
+                                long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                    IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal( s2[j] ) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
+                            long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps )  || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                            //try with second two args as zero
+                            if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( s[j], 0.0, 0.0 );
+                                correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
+                                long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
+                                long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                    IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal(s3[j]) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
+                            long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: %f ulp error at {%a, %a, %a}: *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((double*) gOut_Ref)[j], test );
+ error = -1;
+ goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+            vlog("." );
+            fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "pass" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
deleted file mode 100644
index a32cd5a..0000000
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ /dev/null
@@ -1,301 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2,  __global double",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2, "
-        "__global double* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       double3 d2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0f;
-    double maxErrorVal2 = 0.0f;
-    double maxErrorVal3 = 0.0f;
-    uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        double *p2 = (double *)gIn2;
-        double *p3 = (double *)gIn3;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            p[j] = DoubleFromUInt32(genrand_int32(d));
-            p2[j] = DoubleFromUInt32(genrand_int32(d));
-            p3[j] = DoubleFromUInt32(genrand_int32(d));
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn3, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        double *r = (double *)gOut_Ref;
-        double *s = (double *)gIn;
-        double *s2 = (double *)gIn2;
-        double *s3 = (double *)gIn3;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-            r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data -- No verification possible.
-        // MAD is a random number generator.
-        if (0 == (i & 0x0fffffff))
-        {
-            vlog(".");
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
-             maxErrorVal3);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
deleted file mode 100644
index 095a22f..0000000
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ /dev/null
@@ -1,300 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2,  __global float",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2, "
-        "__global float* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       float3 f2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    float maxErrorVal = 0.0f;
-    float maxErrorVal2 = 0.0f;
-    float maxErrorVal3 = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        cl_uint *p = (cl_uint *)gIn;
-        cl_uint *p2 = (cl_uint *)gIn2;
-        cl_uint *p3 = (cl_uint *)gIn3;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            p[j] = genrand_int32(d);
-            p2[j] = genrand_int32(d);
-            p3[j] = genrand_int32(d);
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn3, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        float *r = (float *)gOut_Ref;
-        float *s = (float *)gIn;
-        float *s2 = (float *)gIn2;
-        float *s3 = (float *)gIn3;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            r[j] = (float)f->func.f_fff(s[j], s2[j], s3[j]);
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data -- No verification possible.
-        // MAD is a random number generator.
-        if (0 == (i & 0x0fffffff))
-        {
-            vlog(".");
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
-             maxErrorVal3);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index d6c2f11..1e33b95 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,125 +13,128 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "function_list.h"
-#include "sleep.h"
-#include "utility.h"
+#include "Utility.h"
 #include <cstdio>
 #include <cstdlib>
-#include <ctime>
 #include <string>
-#include <vector>
+#include <time.h>
+#include "FunctionList.h"
+#include "Sleep.h"
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
 #include "harness/parseParameters.h"
-#include "harness/typeWrappers.h"
-#if defined(__APPLE__)
-#include <sys/sysctl.h>
-#include <sys/mman.h>
-#include <libgen.h>
-#include <sys/time.h>
-#elif defined(__linux__)
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <linux/sysctl.h>
-#include <sys/param.h>
+#if defined( __APPLE__ )
+    #include <sys/sysctl.h>
+    #include <sys/mman.h>
+    #include <libgen.h>
+    #include <sys/time.h>
+#elif defined( __linux__ )
+    #include <unistd.h>
+    #include <sys/syscall.h>
+    #include <linux/sysctl.h>
+    #include <sys/param.h>
-#if defined(__linux__) || (defined WIN32 && defined __MINGW32__)
+#if defined (__linux__) || (defined WIN32 && defined __MINGW32__)
 #include <sys/param.h>
 #include "harness/testHarness.h"
-#define kPageSize 4096
-#define DOUBLE_REQUIRED_FEATURES                                               \
-    (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO                  \
+#define kPageSize           4096
-static std::vector<const char *> gTestNames;
-static char appName[MAXPATHLEN] = "";
-cl_device_id gDevice = NULL;
-cl_context gContext = NULL;
+const char      **gTestNames = NULL;
+unsigned int    gTestNameCount = 0;
+char            appName[ MAXPATHLEN ] = "";
+cl_device_id    gDevice = NULL;
+cl_context      gContext = NULL;
 cl_command_queue gQueue = NULL;
-static int32_t gStartTestNumber = -1;
-static int32_t gEndTestNumber = -1;
-int gSkipCorrectnessTesting = 0;
-static int gStopOnError = 0;
-static bool gSkipRestOfTests;
-int gForceFTZ = 0;
-int gWimpyMode = 0;
-static int gHasDouble = 0;
-static int gTestFloat = 1;
-// This flag should be 'ON' by default and it can be changed through the command
-// line arguments.
-static int gTestFastRelaxed = 1;
-/*This flag corresponds to defining if the implementation has Derived Fast
-  Relaxed functions. The spec does not specify ULP for derived function.  The
-  derived functions are composed of base functions which are tested for ULP,
-  thus when this flag is enabled, Derived functions will not be tested for ULP,
-  as per table 7.1 of OpenCL 2.0 spec. Since there is no way of quering the
-  device whether it is a derived or non-derived implementation according to
-  OpenCL 2.0 spec then it has to be changed through a command line argument.
+static int32_t  gStartTestNumber;
+static int32_t  gEndTestNumber;
+int             gSkipCorrectnessTesting = 0;
+int             gStopOnError = 0;
+static bool     gSkipRestOfTests;
+#if defined( __APPLE__ )
+int             gMeasureTimes = 1;
+int             gMeasureTimes = 0;
+int             gReportAverageTimes = 0;
+int             gForceFTZ = 0;
+int             gWimpyMode = 0;
+int             gHasDouble = 0;
+int             gTestFloat = 1;
+//This flag should be 'ON' by default and it can be changed through the command line arguments.
+volatile int             gTestFastRelaxed = 1;
+/*This flag corresponds to defining if the implementation has Derived Fast Relaxed functions.
+  The spec does not specify ULP for derived function.  The derived functions are composed of base functions which are tested for ULP, thus when this flag is enabled,
+  Derived functions will not be tested for ULP, as per table 7.1 of OpenCL 2.0 spec.
+  Since there is no way of quering the device whether it is a derived or non-derived implementation according to OpenCL 2.0 spec then it has to be changed through a command line argument.
-int gFastRelaxedDerived = 1;
-static int gToggleCorrectlyRoundedDivideSqrt = 0;
-int gDeviceILogb0 = 1;
-int gDeviceILogbNaN = 1;
-int gCheckTininessBeforeRounding = 1;
-int gIsInRTZMode = 0;
-uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
-uint32_t gMinVectorSizeIndex = 0;
-void *gIn = NULL;
-void *gIn2 = NULL;
-void *gIn3 = NULL;
-void *gOut_Ref = NULL;
-void *gOut_Ref2 = NULL;
-cl_mem gInBuffer = NULL;
-cl_mem gInBuffer2 = NULL;
-cl_mem gInBuffer3 = NULL;
-static MTdata gMTdata;
+int             gFastRelaxedDerived = 1;
+int             gToggleCorrectlyRoundedDivideSqrt = 0;
+int             gDeviceILogb0 = 1;
+int             gDeviceILogbNaN = 1;
+int             gCheckTininessBeforeRounding = 1;
+int             gIsInRTZMode = 0;
+uint32_t        gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
+uint32_t        gMinVectorSizeIndex = 0;
+const char      *method[] = { "Best", "Average" };
+void            *gIn = NULL;
+void            *gIn2 = NULL;
+void            *gIn3 = NULL;
+void            *gOut_Ref = NULL;
+void            *gOut[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL };
+void            *gOut_Ref2 = NULL;
+void            *gOut2[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL };
+cl_mem          gInBuffer = NULL;
+cl_mem          gInBuffer2 = NULL;
+cl_mem          gInBuffer3 = NULL;
+cl_mem          gOutBuffer[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL };
+cl_mem          gOutBuffer2[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL };
+uint32_t        gComputeDevices = 0;
+uint32_t        gSimdSize = 1;
+uint32_t        gDeviceFrequency = 0;
+static MTdata   gMTdata;
 cl_device_fp_config gFloatCapabilities = 0;
-int gWimpyReductionFactor = 32;
-int gVerboseBruteForce = 0;
+cl_device_fp_config gDoubleCapabilities = 0;
+int             gWimpyReductionFactor = 32;
+int             gWimpyBufferSize = BUFFER_SIZE;
+int             gVerboseBruteForce = 0;
-static int ParseArgs(int argc, const char **argv);
-static void PrintUsage(void);
-static void PrintFunctions(void);
-static test_status InitCL(cl_device_id device);
-static void ReleaseCL(void);
-static int InitILogbConstants(void);
-static int IsTininessDetectedBeforeRounding(void);
-static int
-IsInRTZMode(void); // expensive. Please check gIsInRTZMode global instead.
+static int ParseArgs( int argc, const char **argv );
+static void PrintUsage( void );
+static void PrintFunctions( void );
+test_status InitCL( cl_device_id device );
+static void ReleaseCL( void );
+static int InitILogbConstants( void );
+static int IsTininessDetectedBeforeRounding( void );
+static int IsInRTZMode( void );         //expensive. Please check gIsInRTZMode global instead.
-static int doTest(const char *name)
+int doTest( const char* name )
-    if (gSkipRestOfTests)
+    if( gSkipRestOfTests )
-        vlog("Skipping function because of an earlier error.\n");
+        vlog( "Skipping function because of an earlier error.\n" );
         return 1;
     int error = 0;
-    const Func *func_data = NULL;
+    const Func* func_data = NULL;
-    for (size_t i = 0; i < functionListCount; i++)
+    for( size_t i = 0; i < functionListCount; i++ )
-        const Func *const temp_func = functionList + i;
-        if (strcmp(temp_func->name, name) == 0)
+        const Func* const temp_func = functionList + i;
+        if( strcmp( temp_func->name, name ) == 0 )
-            if ((gStartTestNumber != -1 && i < gStartTestNumber)
-                || i > gEndTestNumber)
+            if( i < gStartTestNumber || i > gEndTestNumber )
-                vlog("Skipping function #%d\n", i);
+                vlog( "Skipping function #%d\n", i );
                 return 0;
@@ -140,164 +143,613 @@
-    if (func_data == NULL)
+    if( func_data == NULL )
-        vlog("Function '%s' doesn't exist!\n", name);
-        exit(EXIT_FAILURE);
+        vlog( "Function '%s' doesn't exist!\n", name );
+        exit( EXIT_FAILURE );
-    if (func_data->func.p == NULL)
+    if( func_data->func.p == NULL )
-        vlog("'%s' is missing implementation, skipping function.\n",
-             func_data->name);
+        vlog( "'%s' is missing implementation, skipping function.\n", func_data->name );
         return 0;
     // if correctly rounded divide & sqrt are supported by the implementation
     // then test it; otherwise skip the test
-    if (strcmp(func_data->name, "sqrt_cr") == 0
-        || strcmp(func_data->name, "divide_cr") == 0)
+    if( strcmp( func_data->name, "sqrt_cr" ) == 0 || strcmp( func_data->name, "divide_cr" ) == 0 )
-        if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
+        if( ( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) == 0 )
-            vlog("Correctly rounded divide and sqrt are not supported, "
-                 "skipping function.\n");
+            vlog( "Correctly rounded divide and sqrt are not supported, skipping function.\n" );
             return 0;
         extern int my_ilogb(double);
-        if (0 == strcmp("ilogb", func_data->name))
+        if( 0 == strcmp( "ilogb", func_data->name ) )
-        if (gTestFastRelaxed && func_data->relaxed)
+        if ( gTestFastRelaxed )
-            if (get_device_cl_version(gDevice) > Version(1, 2))
+            if( func_data->relaxed )
-                vlog("%3d: ", gTestCount);
-                // Test with relaxed requirements here.
-                if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
-                                                  true /* relaxed mode */))
+                vlog( "%3d: ", gTestCount );
+                if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata )  )
-                    if (gStopOnError)
+                    if( gStopOnError )
                         gSkipRestOfTests = true;
                         return error;
-            else
-            {
-                vlog("Skipping reduced precision testing for device with "
-                     "version 1.2 or less\n");
-            }
-        if (gTestFloat)
+        if( gTestFloat )
+            int testFastRelaxedTmp = gTestFastRelaxed;
+            gTestFastRelaxed = 0;
-            vlog("%3d: ", gTestCount);
-            // Don't test with relaxed requirements.
-            if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
-                                              false /* relaxed mode */))
+            vlog( "%3d: ", gTestCount );
+            if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata )  )
-                if (gStopOnError)
+                if( gStopOnError )
+                    gTestFastRelaxed = testFastRelaxedTmp;
                     gSkipRestOfTests = true;
                     return error;
+            gTestFastRelaxed = testFastRelaxedTmp;
-        if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc
-            && NULL != func_data->dfunc.p)
+        if( gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p )
+            //Disable fast-relaxed-math for double precision floating-point
+            int testFastRelaxedTmp = gTestFastRelaxed;
+            gTestFastRelaxed = 0;
-            vlog("%3d: ", gTestCount);
-            // Don't test with relaxed requirements.
-            if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata,
-                                                    false /* relaxed mode*/))
+            vlog( "%3d: ", gTestCount );
+            if( func_data->vtbl_ptr->DoubleTestFunc( func_data, gMTdata )  )
-                if (gStopOnError)
+                if( gStopOnError )
+                    gTestFastRelaxed = testFastRelaxedTmp;
                     gSkipRestOfTests = true;
                     return error;
+            //Re-enable testing fast-relaxed-math mode
+            gTestFastRelaxed = testFastRelaxedTmp;
     return error;
+int test_acos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "acos" );
+int test_acosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "acosh" );
+int test_acospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "acospi" );
+int test_asin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "asin" );
+int test_asinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "asinh" );
+int test_asinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "asinpi" );
+int test_atan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "atan" );
+int test_atanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "atanh" );
+int test_atanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "atanpi" );
+int test_atan2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "atan2" );
+int test_atan2pi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "atan2pi" );
+int test_cbrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "cbrt" );
+int test_ceil( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "ceil" );
+int test_copysign( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "copysign" );
+int test_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "cos" );
+int test_cosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "cosh" );
+int test_cospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "cospi" );
+int test_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "exp" );
+int test_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "exp2" );
+int test_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "exp10" );
+int test_expm1( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "expm1" );
+int test_fabs( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "fabs" );
+int test_fdim( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "fdim" );
+int test_floor( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "floor" );
+int test_fma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "fma" );
+int test_fmax( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "fmax" );
+int test_fmin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "fmin" );
+int test_fmod( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "fmod" );
+int test_fract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "fract" );
+int test_frexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "frexp" );
+int test_hypot( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "hypot" );
+int test_ilogb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "ilogb" );
+int test_isequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isequal" );
+int test_isfinite( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isfinite" );
+int test_isgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isgreater" );
+int test_isgreaterequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isgreaterequal" );
+int test_isinf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isinf" );
+int test_isless( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isless" );
+int test_islessequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "islessequal" );
+int test_islessgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "islessgreater" );
+int test_isnan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isnan" );
+int test_isnormal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isnormal" );
+int test_isnotequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isnotequal" );
+int test_isordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isordered" );
+int test_isunordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "isunordered" );
+int test_ldexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "ldexp" );
+int test_lgamma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "lgamma" );
+int test_lgamma_r( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "lgamma_r" );
+int test_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "log" );
+int test_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "log2" );
+int test_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "log10" );
+int test_log1p( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "log1p" );
+int test_logb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "logb" );
+int test_mad( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "mad" );
+int test_maxmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "maxmag" );
+int test_minmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "minmag" );
+int test_modf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "modf" );
+int test_nan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "nan" );
+int test_nextafter( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "nextafter" );
+int test_pow( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "pow" );
+int test_pown( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "pown" );
+int test_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "powr" );
+int test_remainder( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "remainder" );
+int test_remquo( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "remquo" );
+int test_rint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "rint" );
+int test_rootn( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "rootn" );
+int test_round( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "round" );
+int test_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "rsqrt" );
+int test_signbit( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "signbit" );
+int test_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "sin" );
+int test_sincos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "sincos" );
+int test_sinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "sinh" );
+int test_sinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "sinpi" );
+int test_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "sqrt" );
+int test_sqrt_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "sqrt_cr" );
+int test_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "tan" );
+int test_tanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "tanh" );
+int test_tanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "tanpi" );
+int test_trunc( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "trunc" );
+int test_half_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_cos" );
+int test_half_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_divide" );
+int test_half_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_exp" );
+int test_half_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_exp2" );
+int test_half_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_exp10" );
+int test_half_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_log" );
+int test_half_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_log2" );
+int test_half_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_log10" );
+int test_half_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_powr" );
+int test_half_recip( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_recip" );
+int test_half_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_rsqrt" );
+int test_half_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_sin" );
+int test_half_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_sqrt" );
+int test_half_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "half_tan" );
+int test_add( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "add" );
+int test_subtract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "subtract" );
+int test_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "divide" );
+int test_divide_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "divide_cr" );
+int test_multiply( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "multiply" );
+int test_assignment( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "assignment" );
+int test_not( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+    return doTest( "not" );
-#define TEST_LAMBDA(name)                                                      \
-    [](cl_device_id, cl_context, cl_command_queue, int) {                      \
-        return doTest(#name);                                                  \
-    }
-// Redefine ADD_TEST to use TEST_LAMBDA.
-#undef ADD_TEST
-#define ADD_TEST(name)                                                         \
-    {                                                                          \
-        TEST_LAMBDA(name), #name, Version(1, 0)                                \
-    }
-static test_definition test_list[] = {
-    ADD_TEST(acos),          ADD_TEST(acosh),      ADD_TEST(acospi),
-    ADD_TEST(asin),          ADD_TEST(asinh),      ADD_TEST(asinpi),
-    ADD_TEST(atan),          ADD_TEST(atanh),      ADD_TEST(atanpi),
-    ADD_TEST(atan2),         ADD_TEST(atan2pi),    ADD_TEST(cbrt),
-    ADD_TEST(ceil),          ADD_TEST(copysign),   ADD_TEST(cos),
-    ADD_TEST(cosh),          ADD_TEST(cospi),      ADD_TEST(exp),
-    ADD_TEST(exp2),          ADD_TEST(exp10),      ADD_TEST(expm1),
-    ADD_TEST(fabs),          ADD_TEST(fdim),       ADD_TEST(floor),
-    ADD_TEST(fma),           ADD_TEST(fmax),       ADD_TEST(fmin),
-    ADD_TEST(fmod),          ADD_TEST(fract),      ADD_TEST(frexp),
-    ADD_TEST(hypot),         ADD_TEST(ilogb),      ADD_TEST(isequal),
-    ADD_TEST(isfinite),      ADD_TEST(isgreater),  ADD_TEST(isgreaterequal),
-    ADD_TEST(isinf),         ADD_TEST(isless),     ADD_TEST(islessequal),
-    ADD_TEST(islessgreater), ADD_TEST(isnan),      ADD_TEST(isnormal),
-    ADD_TEST(isnotequal),    ADD_TEST(isordered),  ADD_TEST(isunordered),
-    ADD_TEST(ldexp),         ADD_TEST(lgamma),     ADD_TEST(lgamma_r),
-    ADD_TEST(log),           ADD_TEST(log2),       ADD_TEST(log10),
-    ADD_TEST(log1p),         ADD_TEST(logb),       ADD_TEST(mad),
-    ADD_TEST(maxmag),        ADD_TEST(minmag),     ADD_TEST(modf),
-    ADD_TEST(nan),           ADD_TEST(nextafter),  ADD_TEST(pow),
-    ADD_TEST(pown),          ADD_TEST(powr),       ADD_TEST(remainder),
-    ADD_TEST(remquo),        ADD_TEST(rint),       ADD_TEST(rootn),
-    ADD_TEST(round),         ADD_TEST(rsqrt),      ADD_TEST(signbit),
-    ADD_TEST(sin),           ADD_TEST(sincos),     ADD_TEST(sinh),
-    ADD_TEST(sinpi),         ADD_TEST(sqrt),       ADD_TEST(sqrt_cr),
-    ADD_TEST(tan),           ADD_TEST(tanh),       ADD_TEST(tanpi),
-    ADD_TEST(trunc),         ADD_TEST(half_cos),   ADD_TEST(half_divide),
-    ADD_TEST(half_exp),      ADD_TEST(half_exp2),  ADD_TEST(half_exp10),
-    ADD_TEST(half_log),      ADD_TEST(half_log2),  ADD_TEST(half_log10),
-    ADD_TEST(half_powr),     ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
-    ADD_TEST(half_sin),      ADD_TEST(half_sqrt),  ADD_TEST(half_tan),
-    ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(divide),
-    ADD_TEST(divide_cr),     ADD_TEST(multiply),   ADD_TEST(assignment),
-    ADD_TEST(not),
+test_definition test_list[] = {
+    ADD_TEST( acos ),
+    ADD_TEST( acosh ),
+    ADD_TEST( acospi ),
+    ADD_TEST( asin ),
+    ADD_TEST( asinh ),
+    ADD_TEST( asinpi ),
+    ADD_TEST( atan ),
+    ADD_TEST( atanh ),
+    ADD_TEST( atanpi ),
+    ADD_TEST( atan2 ),
+    ADD_TEST( atan2pi ),
+    ADD_TEST( cbrt ),
+    ADD_TEST( ceil ),
+    ADD_TEST( copysign ),
+    ADD_TEST( cos ),
+    ADD_TEST( cosh ),
+    ADD_TEST( cospi ),
+    ADD_TEST( exp ),
+    ADD_TEST( exp2 ),
+    ADD_TEST( exp10 ),
+    ADD_TEST( expm1 ),
+    ADD_TEST( fabs ),
+    ADD_TEST( fdim ),
+    ADD_TEST( floor ),
+    ADD_TEST( fma ),
+    ADD_TEST( fmax ),
+    ADD_TEST( fmin ),
+    ADD_TEST( fmod ),
+    ADD_TEST( fract ),
+    ADD_TEST( frexp ),
+    ADD_TEST( hypot ),
+    ADD_TEST( ilogb ),
+    ADD_TEST( isequal ),
+    ADD_TEST( isfinite ),
+    ADD_TEST( isgreater ),
+    ADD_TEST( isgreaterequal ),
+    ADD_TEST( isinf ),
+    ADD_TEST( isless ),
+    ADD_TEST( islessequal ),
+    ADD_TEST( islessgreater ),
+    ADD_TEST( isnan ),
+    ADD_TEST( isnormal ),
+    ADD_TEST( isnotequal ),
+    ADD_TEST( isordered ),
+    ADD_TEST( isunordered ),
+    ADD_TEST( ldexp ),
+    ADD_TEST( lgamma ),
+    ADD_TEST( lgamma_r ),
+    ADD_TEST( log ),
+    ADD_TEST( log2 ),
+    ADD_TEST( log10 ),
+    ADD_TEST( log1p ),
+    ADD_TEST( logb ),
+    ADD_TEST( mad ),
+    ADD_TEST( maxmag ),
+    ADD_TEST( minmag ),
+    ADD_TEST( modf ),
+    ADD_TEST( nan ),
+    ADD_TEST( nextafter ),
+    ADD_TEST( pow ),
+    ADD_TEST( pown ),
+    ADD_TEST( powr ),
+    ADD_TEST( remainder ),
+    ADD_TEST( remquo ),
+    ADD_TEST( rint ),
+    ADD_TEST( rootn ),
+    ADD_TEST( round ),
+    ADD_TEST( rsqrt ),
+    ADD_TEST( signbit ),
+    ADD_TEST( sin ),
+    ADD_TEST( sincos ),
+    ADD_TEST( sinh ),
+    ADD_TEST( sinpi ),
+    ADD_TEST( sqrt ),
+    ADD_TEST( sqrt_cr ),
+    ADD_TEST( tan ),
+    ADD_TEST( tanh ),
+    ADD_TEST( tanpi ),
+    ADD_TEST( trunc ),
+    ADD_TEST( half_cos ),
+    ADD_TEST( half_divide ),
+    ADD_TEST( half_exp ),
+    ADD_TEST( half_exp2 ),
+    ADD_TEST( half_exp10 ),
+    ADD_TEST( half_log ),
+    ADD_TEST( half_log2 ),
+    ADD_TEST( half_log10 ),
+    ADD_TEST( half_powr ),
+    ADD_TEST( half_recip ),
+    ADD_TEST( half_rsqrt ),
+    ADD_TEST( half_sin ),
+    ADD_TEST( half_sqrt ),
+    ADD_TEST( half_tan ),
+    ADD_TEST( add ),
+    ADD_TEST( subtract ),
+    ADD_TEST( divide ),
+    ADD_TEST( divide_cr ),
+    ADD_TEST( multiply ),
+    ADD_TEST( assignment ),
+    ADD_TEST( not ),
-#undef ADD_TEST
-static const int test_num = ARRAY_SIZE(test_list);
+const int test_num = ARRAY_SIZE( test_list );
 #pragma mark -
-int main(int argc, const char *argv[])
+int main (int argc, const char * argv[])
     int error;
@@ -307,113 +759,180 @@
         return -1;
-    error = ParseArgs(argc, argv);
-    if (error) return error;
+#if defined( __APPLE__ )
+    struct timeval startTime;
+    gettimeofday( &startTime, NULL );
+    error = ParseArgs( argc, argv );
+    if( error )
+        return error;
     // This takes a while, so prevent the machine from going to sleep.
-    atexit(ResumeSleep);
+    atexit( ResumeSleep );
-    if (gSkipCorrectnessTesting)
-        vlog("*** Skipping correctness testing! ***\n\n");
-    else if (gStopOnError)
-        vlog("Stopping at first error.\n");
+    if( gSkipCorrectnessTesting )
+        vlog( "*** Skipping correctness testing! ***\n\n" );
+    else if( gStopOnError )
+        vlog( "Stopping at first error.\n" );
-    vlog("   \t                                        ");
-    if (gWimpyMode) vlog("   ");
-    if (!gSkipCorrectnessTesting) vlog("\t  max_ulps");
-    vlog("\n-------------------------------------------------------------------"
-         "----------------------------------------\n");
-    gMTdata = init_genrand(gRandomSeed);
-    FPU_mode_type oldMode;
-    DisableFTZ(&oldMode);
-    int ret = runTestHarnessWithCheck(gTestNames.size(),,
-                                      test_num, test_list, true, 0, InitCL);
-    RestoreFPState(&oldMode);
-    free_mtdata(gMTdata);
-    if (gQueue)
+    if( gMeasureTimes )
-        int error_code = clFinish(gQueue);
-        if (error_code) vlog_error("clFinish failed:%d\n", error_code);
+        vlog( "%s times are reported at right (cycles per element):\n", method[gReportAverageTimes] );
+        vlog( "\n" );
+        if( gSkipCorrectnessTesting )
+            vlog( "   \t               ");
+        else
+            vlog( "   \t                                        ");
+        if( gWimpyMode )
+            vlog( "   " );
+        for( int i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+            vlog( "\t  float%s", sizeNames[i] );
+    }
+    else
+    {
+        vlog( "   \t                                        ");
+        if( gWimpyMode )
+            vlog( "   " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t  max_ulps" );
+    vlog( "\n-----------------------------------------------------------------------------------------------------------\n" );
+    gMTdata = init_genrand( gRandomSeed );
+    if( gEndTestNumber == 0 )
+    {
+        gEndTestNumber = functionListCount;
+    FPU_mode_type oldMode;
+    DisableFTZ( &oldMode );
+    int ret = runTestHarnessWithCheck( gTestNameCount, gTestNames, test_num, test_list, true, 0, InitCL );
+    RestoreFPState( &oldMode );
+    free_mtdata(gMTdata);
+    free(gTestNames);
+    int error_code = clFinish(gQueue);
+    if (error_code)
+        vlog_error("clFinish failed:%d\n", error_code);
+#if defined( __APPLE__ )
+    struct timeval endTime;
+    gettimeofday( &endTime, NULL );
+    double time = (double) endTime.tv_sec - (double) startTime.tv_sec;
+    time += 1e-6 * ((double) endTime.tv_usec - (double) startTime.tv_usec);
+    vlog( "time: %f s\n", time );
     return ret;
-static int ParseArgs(int argc, const char **argv)
+static int ParseArgs( int argc, const char **argv )
-    // We only pass test names to runTestHarnessWithCheck, hence global command
-    // line options defined by the harness cannot be used by the user.
-    // To respect the implementation details of runTestHarnessWithCheck,
-    // gTestNames[0] has to exist although its value is not important.
-    gTestNames.push_back("");
+    int i;
+    gTestNames = (const char**) calloc( argc - 1, sizeof( char*) );
+    if( NULL == gTestNames )
+    {
+        vlog( "Failed to allocate memory for gTestNames array.\n" );
+        return 1;
+    }
+    gTestNames[0] = argv[0];
+    gTestNameCount = 1;
     int singleThreaded = 0;
     { // Extract the app name
-        strncpy(appName, argv[0], MAXPATHLEN);
+        strncpy( appName, argv[0], MAXPATHLEN );
-#if defined(__APPLE__)
+#if defined( __APPLE__ )
         char baseName[MAXPATHLEN];
         char *base = NULL;
-        strncpy(baseName, argv[0], MAXPATHLEN);
-        base = basename(baseName);
-        if (NULL != base)
+        strncpy( baseName, argv[0], MAXPATHLEN );
+        base = basename( baseName );
+        if( NULL != base )
-            strncpy(appName, base, sizeof(appName));
-            appName[sizeof(appName) - 1] = '\0';
+            strncpy( appName, base, sizeof( appName )  );
+            appName[ sizeof( appName ) -1 ] = '\0';
-    vlog("\n%s\t", appName);
-    for (int i = 1; i < argc; i++)
+    vlog( "\n%s\t", appName );
+    for( i = 1; i < argc; i++ )
         const char *arg = argv[i];
-        if (NULL == arg) break;
+        if( NULL == arg )
+            break;
-        vlog("\t%s", arg);
+        vlog( "\t%s", arg );
         int optionFound = 0;
-        if (arg[0] == '-')
+        if( arg[0] == '-' )
-            while (arg[1] != '\0')
+            while( arg[1] != '\0' )
                 optionFound = 1;
-                switch (*arg)
+                switch( *arg )
-                    case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
+                    case 'a':
+                        gReportAverageTimes ^= 1;
+                        break;
-                    case 'd': gHasDouble ^= 1; break;
+                    case 'c':
+                        gToggleCorrectlyRoundedDivideSqrt ^= 1;
+                        break;
-                    case 'e': gFastRelaxedDerived ^= 1; break;
+                    case 'd':
+                        gHasDouble ^= 1;
+                        break;
-                    case 'f': gTestFloat ^= 1; break;
+                    case 'e':
+                        gFastRelaxedDerived ^= 1;
+                        break;
-                    case 'h': PrintUsage(); return -1;
+                    case 'f':
+                        gTestFloat ^= 1;
+                        break;
-                    case 'p': PrintFunctions(); return -1;
+                    case 'h':
+                        PrintUsage();
+                        return -1;
-                    case 'l': gSkipCorrectnessTesting ^= 1; break;
+                    case 'p':
+                      PrintFunctions();
+                      return -1;
-                    case 'm': singleThreaded ^= 1; break;
+                    case 'l':
+                        gSkipCorrectnessTesting ^= 1;
+                        break;
-                    case 'r': gTestFastRelaxed ^= 1; break;
+                    case 'm':
+                        singleThreaded ^= 1;
+                        break;
-                    case 's': gStopOnError ^= 1; break;
+                    case 'r':
+                        gTestFastRelaxed ^= 1;
+                        break;
-                    case 'v': gVerboseBruteForce ^= 1; break;
+                    case 's':
+                        gStopOnError ^= 1;
+                        break;
-                    case 'w': // wimpy mode
+                    case 't':
+                        gMeasureTimes ^= 1;
+                        break;
+                    case 'v':
+                        gVerboseBruteForce ^= 1;
+                        break;
+                    case 'w':   // wimpy mode
                         gWimpyMode ^= 1;
@@ -421,10 +940,12 @@
                         parseWimpyReductionFactor(arg, gWimpyReductionFactor);
-                    case 'z': gForceFTZ ^= 1; break;
+                    case 'z':
+                        gForceFTZ ^= 1;
+                        break;
                     case '1':
-                        if (arg[1] == '6')
+                        if( arg[1] == '6' )
                             gMinVectorSizeIndex = 5;
                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
@@ -437,194 +958,181 @@
                     case '2':
-                        gMinVectorSizeIndex = 1;
-                        gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
-                        break;
+                            gMinVectorSizeIndex = 1;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
                     case '3':
-                        gMinVectorSizeIndex = 2;
-                        gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
-                        break;
+                            gMinVectorSizeIndex = 2;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
                     case '4':
-                        gMinVectorSizeIndex = 3;
-                        gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
-                        break;
+                            gMinVectorSizeIndex = 3;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
                     case '8':
-                        gMinVectorSizeIndex = 4;
-                        gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            gMinVectorSizeIndex = 4;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
-                        vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
+                        vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
                         return -1;
-        if (!optionFound)
+        if( ! optionFound )
             char *t = NULL;
-            long number = strtol(arg, &t, 0);
-            if (t != arg)
+            long number = strtol( arg, &t, 0 );
+            if( t != arg )
-                if (-1 == gStartTestNumber)
-                    gStartTestNumber = (int32_t)number;
+                if( 0 == gStartTestNumber )
+                    gStartTestNumber = (int32_t) number;
-                    gEndTestNumber = gStartTestNumber + (int32_t)number;
+                    gEndTestNumber = gStartTestNumber + (int32_t) number;
                 // Make sure this is a valid name
                 unsigned int k;
-                for (k = 0; k < functionListCount; k++)
+                for (k=0; k<functionListCount; k++)
-                    const Func *f = functionList + k;
+                    const Func *f = functionList+k;
                     if (strcmp(arg, f->name) == 0)
-                        gTestNames.push_back(arg);
+                        gTestNames[ gTestNameCount ] = arg;
+                        gTestNameCount++;
                 // If we didn't find it in the list of test names
                 if (k >= functionListCount)
-                    gTestNames.push_back(arg);
+                    gTestNames[gTestNameCount] = arg;
+                    gTestNameCount++;
     // Check for the wimpy mode environment variable
-    if (getenv("CL_WIMPY_MODE"))
-    {
-        vlog("\n");
-        vlog("*** Detected CL_WIMPY_MODE env                          ***\n");
-        gWimpyMode = 1;
+    if (getenv("CL_WIMPY_MODE")) {
+      vlog( "\n" );
+      vlog( "*** Detected CL_WIMPY_MODE env                          ***\n" );
+      gWimpyMode = 1;
-    vlog("\nTest binary built %s %s\n", __DATE__, __TIME__);
+    vlog( "\nTest binary built %s %s\n", __DATE__, __TIME__ );
-    if (gWimpyMode)
+    if( gWimpyMode )
-        vlog("\n");
-        vlog("*** WARNING: Testing in Wimpy mode!                     ***\n");
-        vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n");
-        vlog("*** Wimpy Reduction Factor: %-27u ***\n\n",
-             gWimpyReductionFactor);
+        vlog( "\n" );
+        vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
+        vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
+        vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor );
-    if (singleThreaded) SetThreadCount(1);
+    if( singleThreaded )
+        SetThreadCount(1);
     return 0;
-static void PrintFunctions(void)
+static void PrintFunctions ( void )
-    vlog("\nMath function names:\n");
-    for (int i = 0; i < functionListCount; i++)
-    {
-        vlog("\t%s\n", functionList[i].name);
-    }
+  vlog( "\nMath function names:\n" );
+  for( int i = 0; i < functionListCount; i++ )
+  {
+    vlog( "\t%s\n", functionList[ i ].name );
+  }
-static void PrintUsage(void)
+static void PrintUsage( void )
-    vlog("%s [-cglsz]: <optional: math function names>\n", appName);
-    vlog("\toptions:\n");
-    vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
-         "off)\n");
-    vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
-         "on)\n");
-    vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
-    vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
-    vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
-         "precision. (Default: on)\n");
-    vlog("\t\t-h\tPrint this message and quit\n");
-    vlog("\t\t-p\tPrint all math function names and quit\n");
-    vlog("\t\t-l\tlink check only (make sure functions are present, skip "
-         "accuracy checks.)\n");
-    vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
-    vlog("\t\t-s\tStop on error\n");
-    vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
-    vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
-         "1-10, default factor(%u)\n",
-         gWimpyReductionFactor);
-    vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by "
-         "device capabilities by default.)\n");
-    vlog("\t\t-v\tToggle Verbosity (Default: off)\n ");
-    vlog("\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, "
-         "\"-16\" tests 16-wide vectors only.\n");
-    vlog("\n\tYou may also pass a number instead of a function name.\n");
-    vlog("\tThis causes the first N tests to be skipped. The tests are "
-         "numbered.\n");
-    vlog("\tIf you pass a second number, that is the number tests to run after "
-         "the first one.\n");
-    vlog("\tA name list may be used in conjunction with a number range. In "
-         "that case,\n");
-    vlog("\tonly the named cases in the number range will run.\n");
-    vlog("\tYou may also choose to pass no arguments, in which case all tests "
-         "will be run.\n");
-    vlog("\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the "
-         "device.\n");
-    vlog("\n");
+    vlog( "%s [-acglstz]: <optional: math function names>\n", appName );
+    vlog( "\toptions:\n" );
+    vlog( "\t\t-a\tReport average times instead of best times\n" );
+    vlog( "\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: off)\n");
+    vlog( "\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 on)\n" );
+    vlog( "\t\t-f\tToggle float precision testing. (Default: on)\n" );
+    vlog( "\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n" );
+    vlog( "\t\t-e\tToggle test as derived implementations for fast relaxed math precision. (Default: on)\n" );
+    vlog( "\t\t-h\tPrint this message and quit\n" );
+    vlog( "\t\t-p\tPrint all math function names and quit\n" );
+    vlog( "\t\t-l\tlink check only (make sure functions are present, skip accuracy checks.)\n" );
+    vlog( "\t\t-m\tToggle run multi-threaded. (Default: on) )\n" );
+    vlog( "\t\t-s\tStop on error\n" );
+    vlog( "\t\t-t\tToggle timing  (on by default)\n" );
+    vlog( "\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
+    vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-10, default factor(%u)\n",gWimpyReductionFactor );
+    vlog( "\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by device capabilities by default.)\n" );
+    vlog( "\t\t-v\tToggle Verbosity (Default: off)\n ");
+    vlog( "\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, \"-16\" tests 16-wide vectors only.\n" );
+    vlog( "\n\tYou may also pass a number instead of a function name.\n" );
+    vlog( "\tThis causes the first N tests to be skipped. The tests are numbered.\n" );
+    vlog( "\tIf you pass a second number, that is the number tests to run after the first one.\n" );
+    vlog( "\tA name list may be used in conjunction with a number range. In that case,\n" );
+    vlog( "\tonly the named cases in the number range will run.\n" );
+    vlog( "\tYou may also choose to pass no arguments, in which case all tests will be run.\n" );
+    vlog( "\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the device.\n" );
+    vlog( "\n" );
-static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo,
-                                                   const void *private_info,
-                                                   size_t cb, void *user_data)
+static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
-    vlog("%s  (%p, %zd, %p)\n", errinfo, private_info, cb, user_data);
+    vlog( "%s  (%p, %zd, %p)\n", errinfo, private_info, cb, user_data );
-test_status InitCL(cl_device_id device)
+test_status InitCL( cl_device_id device )
     int error;
     uint32_t i;
+    size_t configSize = sizeof( gComputeDevices );
     cl_device_type device_type;
-    error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type),
-                            &device_type, NULL);
-    if (error)
+    error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL );
+    if( error )
-        print_error(error, "Unable to get device type");
+        print_error( error, "Unable to get device type" );
         return TEST_FAIL;
     gDevice = device;
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) )
+        gComputeDevices = 1;
     // Check extensions
-    if (is_extension_available(gDevice, "cl_khr_fp64"))
+    if(is_extension_available(gDevice, "cl_khr_fp64"))
         gHasDouble ^= 1;
-        cl_device_fp_config doubleCapabilities = 0;
-        if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG,
-                                     sizeof(doubleCapabilities),
-                                     &doubleCapabilities, NULL)))
+        if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(gDoubleCapabilities), &gDoubleCapabilities, NULL)))
-            vlog_error("ERROR: Unable to get device "
-                       "CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n",
-                       error);
+            vlog_error( "ERROR: Unable to get device CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n", error );
             return TEST_FAIL;
-            != (doubleCapabilities & DOUBLE_REQUIRED_FEATURES))
             std::string list;
-            if (0 == (doubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
-            if (0 == (doubleCapabilities & CL_FP_ROUND_TO_NEAREST))
+            if (0 == (gDoubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
+            if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST) )
                 list += "CL_FP_ROUND_TO_NEAREST, ";
-            if (0 == (doubleCapabilities & CL_FP_ROUND_TO_ZERO))
+            if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_ZERO) )
                 list += "CL_FP_ROUND_TO_ZERO, ";
-            if (0 == (doubleCapabilities & CL_FP_ROUND_TO_INF))
+            if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_INF) )
                 list += "CL_FP_ROUND_TO_INF, ";
-            if (0 == (doubleCapabilities & CL_FP_INF_NAN))
+            if( 0 == (gDoubleCapabilities & CL_FP_INF_NAN) )
                 list += "CL_FP_INF_NAN, ";
-            if (0 == (doubleCapabilities & CL_FP_DENORM))
+            if( 0 == (gDoubleCapabilities & CL_FP_DENORM) )
                 list += "CL_FP_DENORM, ";
             vlog_error("ERROR: required double features are missing: %s\n",
@@ -632,102 +1140,100 @@
             return TEST_FAIL;
-        vlog_error("FAIL: device says it supports cl_khr_fp64 but "
-                   "CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n");
+        vlog_error( "FAIL: device says it supports cl_khr_fp64 but CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n" );
         return TEST_FAIL;
-    uint32_t deviceFrequency = 0;
-    size_t configSize = sizeof(deviceFrequency);
-    if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
-                                 configSize, &deviceFrequency, NULL)))
-        deviceFrequency = 0;
+    configSize = sizeof( gDeviceFrequency );
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) )
+        gDeviceFrequency = 0;
-    if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG,
-                                 sizeof(gFloatCapabilities),
-                                 &gFloatCapabilities, NULL)))
+    if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(gFloatCapabilities), &gFloatCapabilities, NULL)))
-        vlog_error(
-            "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
-            error);
+        vlog_error( "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n", error );
         return TEST_FAIL;
-    gContext = clCreateContext(NULL, 1, &gDevice, bruteforce_notify_callback,
-                               NULL, &error);
-    if (NULL == gContext || error)
+    gContext = clCreateContext( NULL, 1, &gDevice, bruteforce_notify_callback, NULL, &error );
+    if( NULL == gContext || error )
-        vlog_error("clCreateContext failed. (%d) \n", error);
+        vlog_error( "clCreateContext failed. (%d) \n", error );
         return TEST_FAIL;
     gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
-    if (NULL == gQueue || error)
+    if( NULL == gQueue || error )
-        vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+        vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
         return TEST_FAIL;
-    // Allocate buffers
+#if defined( __APPLE__ )
+    // FIXME: use clProtectedArray
+    //Allocate buffers
     cl_uint min_alignment = 0;
-    error = clGetDeviceInfo(gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
-                            sizeof(cl_uint), (void *)&min_alignment, NULL);
+    error = clGetDeviceInfo (gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), (void*)&min_alignment, NULL);
     if (CL_SUCCESS != error)
-        vlog_error("clGetDeviceInfo failed. (%d)\n", error);
+        vlog_error( "clGetDeviceInfo failed. (%d)\n", error );
         return TEST_FAIL;
-    min_alignment >>= 3; // convert bits to bytes
+    min_alignment >>= 3;    // convert bits to bytes
-    gIn = align_malloc(BUFFER_SIZE, min_alignment);
-    if (NULL == gIn) return TEST_FAIL;
-    gIn2 = align_malloc(BUFFER_SIZE, min_alignment);
-    if (NULL == gIn2) return TEST_FAIL;
-    gIn3 = align_malloc(BUFFER_SIZE, min_alignment);
-    if (NULL == gIn3) return TEST_FAIL;
-    gOut_Ref = align_malloc(BUFFER_SIZE, min_alignment);
-    if (NULL == gOut_Ref) return TEST_FAIL;
-    gOut_Ref2 = align_malloc(BUFFER_SIZE, min_alignment);
-    if (NULL == gOut_Ref2) return TEST_FAIL;
+    gIn   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gIn )
+        return TEST_FAIL;
+    gIn2   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gIn2 )
+        return TEST_FAIL;
+    gIn3   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gIn3 )
+        return TEST_FAIL;
+    gOut_Ref   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gOut_Ref )
+        return TEST_FAIL;
+    gOut_Ref2   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gOut_Ref2 )
+        return TEST_FAIL;
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-        gOut[i] = align_malloc(BUFFER_SIZE, min_alignment);
-        if (NULL == gOut[i]) return TEST_FAIL;
-        gOut2[i] = align_malloc(BUFFER_SIZE, min_alignment);
-        if (NULL == gOut2[i]) return TEST_FAIL;
+        gOut[i] = align_malloc( BUFFER_SIZE, min_alignment );
+        if( NULL == gOut[i] )
+            return TEST_FAIL;
+        gOut2[i] = align_malloc( BUFFER_SIZE, min_alignment );
+        if( NULL == gOut2[i] )
+            return TEST_FAIL;
     cl_mem_flags device_flags = CL_MEM_READ_ONLY;
     // save a copy on the host device to make this go faster
-    if (CL_DEVICE_TYPE_CPU == device_type)
+    if( CL_DEVICE_TYPE_CPU == device_type )
         device_flags |= CL_MEM_USE_HOST_PTR;
-    else
-        device_flags |= CL_MEM_COPY_HOST_PTR;
+      else
+          device_flags |= CL_MEM_COPY_HOST_PTR;
     // setup input buffers
-    gInBuffer =
-        clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
-    if (gInBuffer == NULL || error)
+    gInBuffer = clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
+    if( gInBuffer == NULL || error )
-        vlog_error("clCreateBuffer1 failed for input (%d)\n", error);
+        vlog_error( "clCreateBuffer1 failed for input (%d)\n", error );
         return TEST_FAIL;
-    gInBuffer2 =
-        clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn2, &error);
-    if (gInBuffer2 == NULL || error)
+    gInBuffer2 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn2, &error );
+    if( gInBuffer2 == NULL || error )
-        vlog_error("clCreateBuffer2 failed for input (%d)\n", error);
+        vlog_error( "clCreateArray2 failed for input (%d)\n" , error );
         return TEST_FAIL;
-    gInBuffer3 =
-        clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn3, &error);
-    if (gInBuffer3 == NULL || error)
+    gInBuffer3 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn3, &error );
+    if( gInBuffer3 == NULL  || error)
-        vlog_error("clCreateBuffer3 failed for input (%d)\n", error);
+        vlog_error( "clCreateArray3 failed for input (%d)\n", error );
         return TEST_FAIL;
@@ -735,40 +1241,38 @@
     // setup output buffers
     device_flags = CL_MEM_READ_WRITE;
     // save a copy on the host device to make this go faster
-    if (CL_DEVICE_TYPE_CPU == device_type)
+    if( CL_DEVICE_TYPE_CPU == device_type )
         device_flags |= CL_MEM_USE_HOST_PTR;
-    else
-        device_flags |= CL_MEM_COPY_HOST_PTR;
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+      else
+          device_flags |= CL_MEM_COPY_HOST_PTR;
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-        gOutBuffer[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
-                                       gOut[i], &error);
-        if (gOutBuffer[i] == NULL || error)
+        gOutBuffer[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut[i], &error );
+        if( gOutBuffer[i] == NULL || error )
-            vlog_error("clCreateBuffer failed for output (%d)\n", error);
+            vlog_error( "clCreateArray failed for output (%d)\n", error  );
             return TEST_FAIL;
-        gOutBuffer2[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
-                                        gOut2[i], &error);
-        if (gOutBuffer2[i] == NULL || error)
+        gOutBuffer2[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut2[i], &error );
+        if( gOutBuffer2[i] == NULL || error)
-            vlog_error("clCreateBuffer2 failed for output (%d)\n", error);
+            vlog_error( "clCreateArray2 failed for output (%d)\n", error );
             return TEST_FAIL;
     // we are embedded, check current rounding mode
-    if (gIsEmbedded)
+    if( gIsEmbedded )
         gIsInRTZMode = IsInRTZMode();
-    // Check tininess detection
+    //Check tininess detection
     cl_platform_id platform;
     int err = clGetPlatformIDs(1, &platform, NULL);
-    if (err)
+    if( err )
         print_error(err, "clGetPlatformIDs failed");
         return TEST_FAIL;
@@ -776,97 +1280,78 @@
     char c[1024];
     static const char *no_yes[] = { "NO", "YES" };
-    vlog("\nCompute Device info:\n");
+    vlog( "\nCompute Device info:\n" );
     clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
-    vlog("\tPlatform Version: %s\n", c);
+    vlog( "\tPlatform Version: %s\n", c );
     clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
-    vlog("\tDevice Name: %s\n", c);
+    vlog( "\tDevice Name: %s\n", c );
     clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
-    vlog("\tVendor: %s\n", c);
+    vlog( "\tVendor: %s\n", c );
     clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
-    vlog("\tDevice Version: %s\n", c);
+    vlog( "\tDevice Version: %s\n", c );
     clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
-    vlog("\tCL C Version: %s\n", c);
+    vlog( "\tCL C Version: %s\n", c );
     clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
-    vlog("\tDriver Version: %s\n", c);
-    vlog("\tDevice Frequency: %d MHz\n", deviceFrequency);
-    vlog("\tSubnormal values supported for floats? %s\n",
-         no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)]);
-    vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
-         no_yes[0
-                != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
-    if (gToggleCorrectlyRoundedDivideSqrt)
+    vlog( "\tDriver Version: %s\n", c );
+    vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
+    vlog( "\tSubnormal values supported for floats? %s\n", no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)] );
+    vlog( "\tCorrectly rounded divide and sqrt supported for floats? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] );
+    if( gToggleCorrectlyRoundedDivideSqrt )
         gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
-    vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
-         no_yes[0
-                != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
-    vlog("\tTesting with FTZ mode ON for floats? %s\n",
-         no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
-    vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
-    vlog("\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed]);
-    if (gTestFastRelaxed)
+    vlog( "\tTesting with correctly rounded float divide and sqrt? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] );
+    vlog( "\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)] );
+    vlog( "\tTesting single precision? %s\n", no_yes[0 != gTestFloat] );
+    vlog( "\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed] );
+    if(gTestFastRelaxed)
-        vlog("\tFast relaxed math has derived implementations? %s\n",
-             no_yes[0 != gFastRelaxedDerived]);
+      vlog( "\tFast relaxed math has derived implementations? %s\n", no_yes[0 != gFastRelaxedDerived] );
-    vlog("\tTesting double precision? %s\n", no_yes[0 != gHasDouble]);
-    if (sizeof(long double) == sizeof(double) && gHasDouble)
+    vlog( "\tTesting double precision? %s\n", no_yes[0 != gHasDouble] );
+    if( sizeof( long double) == sizeof( double ) && gHasDouble )
-        vlog("\n\t\tWARNING: Host system long double does not have better "
-             "precision than double!\n");
-        vlog("\t\t         All double results that do not match the reference "
-             "result have their reported\n");
-        vlog("\t\t         error inflated by 0.5 ulps to account for the fact "
-             "that this system\n");
-        vlog("\t\t         can not accurately represent the right result to an "
-             "accuracy closer\n");
-        vlog("\t\t         than half an ulp. See comments in "
-             "Bruteforce_Ulp_Error_Double() for more details.\n\n");
+        vlog( "\n\t\tWARNING: Host system long double does not have better precision than double!\n" );
+        vlog( "\t\t         All double results that do not match the reference result have their reported\n" );
+        vlog( "\t\t         error inflated by 0.5 ulps to account for the fact that this system\n" );
+        vlog( "\t\t         can not accurately represent the right result to an accuracy closer\n" );
+        vlog( "\t\t         than half an ulp. See comments in Bruteforce_Ulp_Error_Double() for more details.\n\n" );
-    vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
-    if (gIsEmbedded)
-        vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
-    vlog("\tTininess is detected before rounding? %s\n",
-         no_yes[0 != gCheckTininessBeforeRounding]);
-    vlog("\tWorker threads: %d\n", GetThreadCount());
-    vlog("\tTesting vector sizes:");
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-        vlog("\t%d", sizeValues[i]);
+    vlog( "\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded] );
+    if( gIsEmbedded )
+        vlog( "\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode] );
+    vlog( "\tTininess is detected before rounding? %s\n", no_yes[0 != gCheckTininessBeforeRounding] );
+    vlog( "\tWorker threads: %d\n", GetThreadCount() );
+    vlog( "\tTesting vector sizes:" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        vlog( "\t%d", sizeValues[i] );
     vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
-    vlog("\n\n");
+    vlog( "\n\n" );
-    // Check to see if we are using single threaded mode on other than a 1.0
-    // device
-    if (getenv("CL_TEST_SINGLE_THREADED"))
-    {
+    // Check to see if we are using single threaded mode on other than a 1.0 device
+    if (getenv( "CL_TEST_SINGLE_THREADED" )) {
-        char device_version[1024] = { 0 };
-        clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(device_version),
-                        device_version, NULL);
+      char device_version[1024] = { 0 };
+      clGetDeviceInfo( gDevice, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
-        if (strcmp("OpenCL 1.0 ", device_version))
-        {
-            vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
-                 "Running single threaded.\n");
-        }
+      if (strcmp("OpenCL 1.0 ",device_version)) {
+        vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n");
+      }
     return TEST_PASS;
-static void ReleaseCL(void)
+static void ReleaseCL( void )
     uint32_t i;
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
+    for ( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) {
@@ -879,182 +1364,170 @@
-    for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
-void _LogBuildError(cl_program p, int line, const char *file)
+void _LogBuildError( cl_program p, int line, const char *file )
     char the_log[2048] = "";
-    vlog_error("%s:%d: Build Log:\n", file, line);
-    if (0
-        == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
-                                 sizeof(the_log), the_log, NULL))
-        vlog_error("%s", the_log);
+    vlog_error( "%s:%d: Build Log:\n", file, line );
+    if( 0 == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(the_log), the_log, NULL) )
+        vlog_error( "%s", the_log );
-        vlog_error("*** Error getting build log for program %p\n", p);
+        vlog_error( "*** Error getting build log for program %p\n", p );
-int InitILogbConstants(void)
+int InitILogbConstants( void )
     int error;
-    const char *kernelSource =
-        R"(__kernel void GetILogBConstants( __global int *out )
-        {
-            out[0] = FP_ILOGB0;
-            out[1] = FP_ILOGBNAN;
-        })";
+    const char *kernel =
+    "__kernel void GetILogBConstants( __global int *out )\n"
+    "{\n"
+    "   out[0] = FP_ILOGB0;\n"
+    "   out[1] = FP_ILOGBNAN;\n"
+    "}\n";
-    clProgramWrapper query;
-    clKernelWrapper kernel;
-    error = create_single_kernel_helper(gContext, &query, &kernel, 1,
-                                        &kernelSource, "GetILogBConstants");
-    if (error != CL_SUCCESS)
+    cl_program query;
+    error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
+    if (NULL == query || error)
-        vlog_error("Error: Unable to create kernel to get FP_ILOGB0 and "
-                   "FP_ILOGBNAN for the device. (%d)",
-                   error);
+        vlog_error( "Error: Unable to create program to get FP_ILOGB0 and FP_ILOGBNAN for the device. (%d)", error );
         return error;
-    if ((error =
-             clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
-                            &gOutBuffer[gMinVectorSizeIndex])))
+    cl_kernel k = clCreateKernel( query, "GetILogBConstants", &error );
+    if( NULL == k || error)
-        vlog_error("Error: Unable to set kernel arg to get FP_ILOGB0 and "
-                   "FP_ILOGBNAN for the device. Err = %d",
-                   error);
+      vlog_error( "Error: Unable to create kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
+        return error;
+    }
+    if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
+    {
+        vlog_error( "Error: Unable to set kernel arg to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
         return error;
     size_t dim = 1;
-    if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
-                                        NULL, NULL)))
+    if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
-        vlog_error("Error: Unable to execute kernel to get FP_ILOGB0 and "
-                   "FP_ILOGBNAN for the device. Err = %d",
-                   error);
+        vlog_error( "Error: Unable to execute kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
         return error;
-    struct
+    struct{ cl_int ilogb0, ilogbnan; }data;
+    if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
-        cl_int ilogb0, ilogbnan;
-    } data;
-    if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
-                                     CL_TRUE, 0, sizeof(data), &data, 0, NULL,
-                                     NULL)))
-    {
-        vlog_error("Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the "
-                   "device. Err = %d",
-                   error);
+        vlog_error( "Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the device. Err = %d", error );
         return error;
     gDeviceILogb0 = data.ilogb0;
     gDeviceILogbNaN = data.ilogbnan;
+    clReleaseKernel(k);
+    clReleaseProgram(query);
     return 0;
-int IsTininessDetectedBeforeRounding(void)
+int IsTininessDetectedBeforeRounding( void )
     int error;
-    const char *kernelSource =
-        R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out )
-        {
-           volatile float a = 0x1.000002p-126f;
-           volatile float b = 0x1.fffffcp-1f;
-           out[0] = a * b; // product is 0x1.fffffffffff8p-127
-        })";
+    const char *kernel =
+    "__kernel void IsTininessDetectedBeforeRounding( __global float *out )\n"
+    "{\n"
+    "   volatile float a = 0x1.000002p-126f;\n"
+    "   volatile float b = 0x1.fffffcp-1f;\n"       // product is 0x1.fffffffffff8p-127
+    "   out[0] = a * b;\n"
+    "}\n";
-    clProgramWrapper query;
-    clKernelWrapper kernel;
-    error =
-        create_single_kernel_helper(gContext, &query, &kernel, 1, &kernelSource,
-                                    "IsTininessDetectedBeforeRounding");
-    if (error != CL_SUCCESS)
-    {
-        vlog_error("Error: Unable to create kernel to detect how tininess is "
-                   "detected for the device. (%d)",
-                   error);
+    cl_program query;
+    error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
+    if (error != CL_SUCCESS) {
+        vlog_error( "Error: Unable to create program to detect how tininess is detected for the device. (%d)", error );
         return error;
-    if ((error =
-             clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
-                            &gOutBuffer[gMinVectorSizeIndex])))
+    cl_kernel k = clCreateKernel( query, "IsTininessDetectedBeforeRounding", &error );
+    if( NULL == k || error)
-        vlog_error("Error: Unable to set kernel arg to detect how tininess is "
-                   "detected  for the device. Err = %d",
-                   error);
+      vlog_error( "Error: Unable to create kernel to detect how tininess is detected  for the device. Err = %d", error );
+        return error;
+    }
+    if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
+    {
+        vlog_error( "Error: Unable to set kernel arg to detect how tininess is detected  for the device. Err = %d", error );
         return error;
     size_t dim = 1;
-    if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
-                                        NULL, NULL)))
+    if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
-        vlog_error("Error: Unable to execute kernel to detect how tininess is "
-                   "detected  for the device. Err = %d",
-                   error);
+        vlog_error( "Error: Unable to execute kernel to detect how tininess is detected  for the device. Err = %d", error );
         return error;
-    struct
+    struct{ cl_uint f; }data;
+    if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
-        cl_uint f;
-    } data;
-    if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
-                                     CL_TRUE, 0, sizeof(data), &data, 0, NULL,
-                                     NULL)))
-    {
-        vlog_error("Error: unable to read result from tininess test from the "
-                   "device. Err = %d",
-                   error);
+        vlog_error( "Error: unable to read result from tininess test from the device. Err = %d", error );
         return error;
     gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
+    clReleaseKernel(k);
+    clReleaseProgram(query);
     return 0;
-int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
-               cl_program *p, bool relaxedMode)
+int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p )
     int error = 0;
     char options[200] = "";
-    if (gForceFTZ)
+    if( gForceFTZ )
-        strcat(options, " -cl-denorms-are-zero");
+      strcat(options," -cl-denorms-are-zero");
-    if (relaxedMode)
+    if( gTestFastRelaxed )
-        strcat(options, " -cl-fast-relaxed-math");
+      strcat(options, " -cl-fast-relaxed-math");
-    error =
-        create_single_kernel_helper(gContext, p, k, count, c, name, options);
+    error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
     if (error != CL_SUCCESS)
-        vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error);
+        vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
+        return error;
+    }
+    *k = clCreateKernel( *p, name, &error );
+    if( NULL == *k || error )
+    {
+        char    buffer[2048] = "";
+        vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
+        clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
+        vlog_error("Log: %s\n", buffer);
+        clReleaseProgram( *p );
         return error;
     return error;
-int MakeKernels(const char **c, cl_uint count, const char *name,
-                cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
+int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p )
     int error = 0;
     cl_uint i;
@@ -1062,41 +1535,39 @@
     if (gForceFTZ)
-        strcat(options, " -cl-denorms-are-zero ");
+      strcat(options," -cl-denorms-are-zero ");
-    if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
+    if( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT )
-        strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt ");
+      strcat(options," -cl-fp32-correctly-rounded-divide-sqrt ");
-    if (relaxedMode)
+    if( gTestFastRelaxed )
-        strcat(options, " -cl-fast-relaxed-math");
+      strcat(options, " -cl-fast-relaxed-math");
-    error =
-        create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
-    if (error != CL_SUCCESS)
+    error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
+    if ( error != CL_SUCCESS )
-        vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
+        vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error );
         return error;
-    memset(k, 0, kernel_count * sizeof(*k));
-    for (i = 0; i < kernel_count; i++)
+    memset( k, 0, kernel_count * sizeof( *k) );
+    for( i = 0; i< kernel_count; i++ )
-        k[i] = clCreateKernel(*p, name, &error);
-        if (NULL == k[i] || error)
+        k[i] = clCreateKernel( *p, name, &error );
+        if( NULL == k[i]|| error )
-            char buffer[2048] = "";
+            char    buffer[2048] = "";
             vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
-            clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
-                                  sizeof(buffer), buffer, NULL);
+            clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
             vlog_error("Log: %s\n", buffer);
-            clReleaseProgram(*p);
+            clReleaseProgram( *p );
             return error;
@@ -1105,116 +1576,99 @@
-static int IsInRTZMode(void)
+static int IsInRTZMode( void )
     int error;
-    const char *kernelSource =
-        R"(__kernel void GetRoundingMode( __global int *out )
-        {
-            volatile float a = 0x1.0p23f;
-            volatile float b = -0x1.0p23f;
-            out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);
-        })";
+    const char *kernel =
+    "__kernel void GetRoundingMode( __global int *out )\n"
+    "{\n"
+    "   volatile float a = 0x1.0p23f;\n"
+    "   volatile float b = -0x1.0p23f;\n"
+    "   out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);\n"
+    "}\n";
-    clProgramWrapper query;
-    clKernelWrapper kernel;
-    error = create_single_kernel_helper(gContext, &query, &kernel, 1,
-                                        &kernelSource, "GetRoundingMode");
-    if (error != CL_SUCCESS)
-    {
-        vlog_error("Error: Unable to create kernel to detect RTZ mode for the "
-                   "device. (%d)",
-                   error);
+    cl_program query;
+    error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
+    if (error != CL_SUCCESS) {
+        vlog_error( "Error: Unable to create program to detect RTZ mode for the device. (%d)", error );
         return error;
-    if ((error =
-             clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
-                            &gOutBuffer[gMinVectorSizeIndex])))
+    cl_kernel k = clCreateKernel( query, "GetRoundingMode", &error );
+    if( NULL == k || error)
-        vlog_error("Error: Unable to set kernel arg to detect RTZ mode for the "
-                   "device. Err = %d",
-                   error);
+        vlog_error( "Error: Unable to create kernel to gdetect RTZ mode for the device. Err = %d", error );
+        return error;
+    }
+    if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
+    {
+        vlog_error( "Error: Unable to set kernel arg to detect RTZ mode for the device. Err = %d", error );
         return error;
     size_t dim = 1;
-    if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
-                                        NULL, NULL)))
+    if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
-        vlog_error("Error: Unable to execute kernel to detect RTZ mode for the "
-                   "device. Err = %d",
-                   error);
+        vlog_error( "Error: Unable to execute kernel to detect RTZ mode for the device. Err = %d", error );
         return error;
-    struct
+    struct{ cl_int isRTZ; }data;
+    if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
-        cl_int isRTZ;
-    } data;
-    if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
-                                     CL_TRUE, 0, sizeof(data), &data, 0, NULL,
-                                     NULL)))
-    {
-        vlog_error(
-            "Error: unable to read RTZ mode data from the device. Err = %d",
-            error);
+        vlog_error( "Error: unable to read RTZ mode data from the device. Err = %d", error );
         return error;
+    clReleaseKernel(k);
+    clReleaseProgram(query);
     return data.isRTZ;
 #pragma mark -
-const char *sizeNames[VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
-const int sizeValues[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
+const char *sizeNames[ VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
+const int  sizeValues[ VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
-// TODO: There is another version of Ulp_Error_Double defined in
-// test_common/harness/errorHelpers.c
-float Bruteforce_Ulp_Error_Double(double test, long double reference)
+// TODO: There is another version of Ulp_Error_Double defined in test_common/harness/errorHelpers.c
+float Bruteforce_Ulp_Error_Double( double test, long double reference )
-    // Check for Non-power-of-two and NaN
+//Check for Non-power-of-two and NaN
-    // Note: This function presumes that someone has already tested whether the
-    // result is correctly, rounded before calling this function.  That test:
-    //
-    //    if( (float) reference == test )
-    //        return 0.0f;
-    //
-    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
-    // before we get here. Otherwise, we'll return inf ulp error here, for what
-    // are otherwise correctly rounded results.
+  // Note: This function presumes that someone has already tested whether the result is correctly,
+  // rounded before calling this function.  That test:
+  //
+  //    if( (float) reference == test )
+  //        return 0.0f;
+  //
+  // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+  // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+  // results.
-    // Deal with long double = double
-    // On most systems long double is a higher precision type than double. They
-    // provide either a 80-bit or greater floating point type, or they provide a
-    // head-tail double double format. That is sufficient to represent the
-    // accuracy of a floating point result to many more bits than double and we
-    // can calculate sub-ulp errors. This is the standard system for which this
-    // test suite is designed.
-    //
-    // On some systems double and long double are the same thing. Then we run
-    // into a problem, because our representation of the infinitely precise
-    // result (passed in as reference above) can be off by as much as a half
-    // double precision ulp itself.  In this case, we inflate the reported error
-    // by half an ulp to take this into account.  A more correct and permanent
-    // fix would be to undertake refactoring the reference code to return
-    // results in this format:
-    //
-    //    typedef struct DoubleReference
-    //    { // true value = correctlyRoundedResult + ulps *
-    //    ulp(correctlyRoundedResult)        (infinitely precise)
-    //        double  correctlyRoundedResult;     // as best we can
-    //        double  ulps;                       // plus a fractional amount to
-    //        account for the difference
-    //    }DoubleReference;                       //     between infinitely
-    //    precise result and correctlyRoundedResult, in units of ulps.
-    //
-    // This would provide a useful higher-than-double precision format for
-    // everyone that we can use, and would solve a few problems with
-    // representing absolute errors below DBL_MIN and over DBL_MAX for systems
-    // that use a head to tail double double for long double.
+  // Deal with long double = double
+  // On most systems long double is a higher precision type than double. They provide either
+  // a 80-bit or greater floating point type, or they provide a head-tail double double format.
+  // That is sufficient to represent the accuracy of a floating point result to many more bits
+  // than double and we can calculate sub-ulp errors. This is the standard system for which this
+  // test suite is designed.
+  //
+  // On some systems double and long double are the same thing. Then we run into a problem,
+  // because our representation of the infinitely precise result (passed in as reference above)
+  // can be off by as much as a half double precision ulp itself.  In this case, we inflate the
+  // reported error by half an ulp to take this into account.  A more correct and permanent fix
+  // would be to undertake refactoring the reference code to return results in this format:
+  //
+  //    typedef struct DoubleReference
+  //    { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult)        (infinitely precise)
+  //        double  correctlyRoundedResult;     // as best we can
+  //        double  ulps;                       // plus a fractional amount to account for the difference
+  //    }DoubleReference;                       //     between infinitely precise result and correctlyRoundedResult, in units of ulps.
+  //
+  // This would provide a useful higher-than-double precision format for everyone that we can use,
+  // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
+  // that use a head to tail double double for long double.
     int x;
     long double testVal = test;
@@ -1222,73 +1676,166 @@
     // First, handle special reference values
     if (isinf(reference))
-        if (reference == testVal) return 0.0f;
+    if (reference == testVal)
+        return 0.0f;
-        return INFINITY;
+    return INFINITY;
     if (isnan(reference))
-        if (isnan(testVal)) return 0.0f;
+    if (isnan(testVal))
+        return 0.0f;
-        return INFINITY;
+    return INFINITY;
-    if (0.0L != reference && 0.5L != frexpl(reference, &x))
+    if ( 0.0L != reference && 0.5L != frexpl(reference, &x) )
     { // Non-zero and Non-power of two
-        // allow correctly rounded results to pass through unmolested. (We might
-        // add error to it below.) There is something of a performance
-        // optimization here.
-        if (testVal == reference) return 0.0f;
+       // allow correctly rounded results to pass through unmolested. (We might add error to it below.)
+       // There is something of a performance optimization here.
+        if( testVal == reference )
+            return 0.0f;
         // The unbiased exponent of the ulp unit place
-        int ulp_exp =
-            DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
+        int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
         // Scale the exponent of the error
-        float result = (float)scalbnl(testVal - reference, ulp_exp);
+        float result = (float) scalbnl( testVal - reference, ulp_exp );
-        // account for rounding error in reference result on systems that do not
-        // have a higher precision floating point type (see above)
-        if (sizeof(long double) == sizeof(double))
-            result += copysignf(0.5f, result);
+        // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+        if( sizeof(long double) == sizeof( double ) )
+            result += copysignf( 0.5f, result);
         return result;
     // reference is a normal power of two or a zero
     // The unbiased exponent of the ulp unit place
-    int ulp_exp =
-        DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
+    int ulp_exp =  DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
-    // allow correctly rounded results to pass through unmolested. (We might add
-    // error to it below.) There is something of a performance optimization here
-    // too.
-    if (testVal == reference) return 0.0f;
+   // allow correctly rounded results to pass through unmolested. (We might add error to it below.)
+   // There is something of a performance optimization here too.
+    if( testVal == reference )
+        return 0.0f;
     // Scale the exponent of the error
-    float result = (float)scalbnl(testVal - reference, ulp_exp);
+    float result = (float) scalbnl( testVal - reference, ulp_exp );
-    // account for rounding error in reference result on systems that do not
-    // have a higher precision floating point type (see above)
-    if (sizeof(long double) == sizeof(double))
-        result += copysignf(0.5f, result);
+    // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+    if( sizeof(long double) == sizeof( double ) )
+        result += copysignf( 0.5f, result);
     return result;
-float Abs_Error(float test, double reference)
+float Abs_Error( float test, double reference )
-    if (isnan(test) && isnan(reference)) return 0.0f;
-    return fabs((float)(reference - (double)test));
+  if( isnan(test) && isnan(reference) )
+    return 0.0f;
+  return fabs((float)(reference-(double)test));
-cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
+#define HALF_MIN_EXP    -13
+#define HALF_MANT_DIG    11
+float Ulp_Error_Half( float test, double reference )
-    if (0 == (x & (x - 1))) return x;
+    union{ double d; uint64_t u; }u;     u.d = reference;
-    while (x & (x - 1)) x &= x - 1;
+  // Note: This function presumes that someone has already tested whether the result is correctly,
+  // rounded before calling this function.  That test:
+  //
+  //    if( (float) reference == test )
+  //        return 0.0f;
+  //
+  // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+  // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+  // results.
-    return x + x;
+    double testVal = test;
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+    if( isinf( reference ) )
+    {
+        if( (double) test == reference )
+            return 0.0f;
+        return (float) (testVal - reference );
+    }
+    // reference is a normal power of two or a zero
+    int ulp_exp =  HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
+#if defined( __APPLE__ )
+    #include <mach/mach_time.h>
+uint64_t GetTime( void )
+#if defined( __APPLE__ )
+    return mach_absolute_time();
+#elif defined(_WIN32) && defined(_MSC_VER)
+    return  ReadTime();
+    //mach_absolute_time is a high precision timer with precision < 1 microsecond.
+    #warning need accurate clock here.  Times are invalid.
+    return 0;
+#if defined(_WIN32) && defined (_MSC_VER)
+/* function is defined in "compat.h" */
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+    uint64_t diff = endTime - startTime;
+    static double conversion = 0.0;
+    if( 0.0 == conversion )
+    {
+#if defined( __APPLE__ )
+        mach_timebase_info_data_t info = {0,0};
+        kern_return_t   err = mach_timebase_info( &info );
+        if( 0 == err )
+            conversion = 1e-9 * (double) info.numer / (double) info.denom;
+    // This function consumes output from GetTime() above, and converts the time to secionds.
+    #warning need accurate ticks to seconds conversion factor here. Times are invalid.
+    }
+    // strictly speaking we should also be subtracting out timer latency here
+    return conversion * (double) diff;
+cl_uint RoundUpToNextPowerOfTwo( cl_uint x )
+    if( 0 == (x & (x-1)))
+        return x;
+    while( x & (x-1) )
+        x &= x-1;
+    return x+x;
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index 3a6516b..99c8eb3 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,55 +13,56 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "reference_math.h"
 #include "harness/compat.h"
-#include <climits>
+#include "reference_math.h"
+#include <limits.h>
 #if !defined(_WIN32)
-#include <cstring>
+#include <string.h>
-#include "utility.h"
+#include "Utility.h"
-#if defined(__SSE__)                                                           \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
-#include <xmmintrin.h>
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    #include <xmmintrin.h>
-#if defined(__SSE2__)                                                          \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
-#include <emmintrin.h>
+#if defined( __SSE2__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    #include <emmintrin.h>
 #ifndef M_PI_4
-#define M_PI_4 (M_PI / 4)
+    #define M_PI_4 (M_PI/4)
+#define EVALUATE( x )       x
+#define CONCATENATE(x, y)  x ## EVALUATE(y)
 static void __log2_ep(double *hi, double *lo, double x);
-typedef union {
+typedef union
     uint64_t i;
     double d;
-} uint64d_t;
 static const uint64d_t _CL_NAN = { 0x7ff8000000000000ULL };
 #define cl_make_nan() _CL_NAN.d
-static double reduce1(double x)
+static double reduce1( double x );
+static double reduce1( double x )
-    if (fabs(x) >= HEX_DBL(+, 1, 0, +, 53))
+    if( fabs(x) >= HEX_DBL( +, 1, 0, +, 53 ) )
-        if (fabs(x) == INFINITY) return cl_make_nan();
+        if( fabs(x) == INFINITY )
+            return cl_make_nan();
-        return 0.0; // we patch up the sign for sinPi and cosPi later, since
-                    // they need different signs
+        return 0.0; //we patch up the sign for sinPi and cosPi later, since they need different signs
     // Find the nearest multiple of 2
-    const double r = copysign(HEX_DBL(+, 1, 0, +, 53), x);
+    const double r = copysign( HEX_DBL( +, 1, 0, +, 53 ), x );
     double z = x + r;
     z -= r;
@@ -69,374 +70,382 @@
     return x - z;
-double reference_acospi(double x) { return reference_acos(x) / M_PI; }
-double reference_asinpi(double x) { return reference_asin(x) / M_PI; }
-double reference_atanpi(double x) { return reference_atan(x) / M_PI; }
-double reference_atan2pi(double y, double x)
+static double reduceHalf( double x );
+static double reduceHalf( double x )
-    return reference_atan2(y, x) / M_PI;
-double reference_cospi(double x)
-    if (reference_fabs(x) >= HEX_DBL(+, 1, 0, +, 52))
+    if( fabs(x) >= HEX_DBL( +, 1, 0, +, 52 ) )
-        if (reference_fabs(x) == INFINITY) return cl_make_nan();
+        if( fabs(x) == INFINITY )
+            return cl_make_nan();
-        // Note this probably fails for odd values between 0x1.0p52 and
-        // 0x1.0p53. However, when starting with single precision inputs, there
-        // will be no odd values.
+        return 0.0; //we patch up the sign for sinPi and cosPi later, since they need different signs
+    }
+    // Find the nearest multiple of 1
+    const double r = copysign( HEX_DBL( +, 1, 0, +, 52 ), x );
+    double z = x + r;
+    z -= r;
+    // subtract it from x. Value is now in the range -0.5 <= x <= 0.5
+    return x - z;
+double reference_acospi( double x) {  return reference_acos( x ) / M_PI;    }
+double reference_asinpi( double x) {  return reference_asin( x ) / M_PI;    }
+double reference_atanpi( double x) {  return reference_atan( x ) / M_PI;    }
+double reference_atan2pi( double y, double x ) { return reference_atan2( y, x) / M_PI; }
+double reference_cospi( double x)
+    if( reference_fabs(x) >= HEX_DBL( +, 1, 0, +, 52 ) )
+    {
+        if( reference_fabs(x) == INFINITY )
+            return cl_make_nan();
+        //Note this probably fails for odd values between 0x1.0p52 and 0x1.0p53.
+        //However, when starting with single precision inputs, there will be no odd values.
         return 1.0;
-    x = reduce1(x + 0.5);
+    x = reduce1(x+0.5);
     // reduce to [-0.5, 0.5]
-    if (x < -0.5)
+    if( x < -0.5 )
         x = -1 - x;
-    else if (x > 0.5)
+    else if ( x > 0.5 )
         x = 1 - x;
     // cosPi zeros are all +0
-    if (x == 0.0) return 0.0;
+    if( x == 0.0 )
+        return 0.0;
-    return reference_sin(x * M_PI);
+    return reference_sin( x * M_PI );
-double reference_relaxed_cospi(double x) { return reference_cospi(x); }
+double reference_relaxed_divide( double x, double y ) { return (float)(((float) x ) / ( (float) y )); }
-double reference_relaxed_divide(double x, double y)
-    return (float)(((float)x) / ((float)y));
-double reference_divide(double x, double y) { return x / y; }
+double reference_divide( double x, double y ) { return x / y; }
 // Add a + b. If the result modulo overflowed, write 1 to *carry, otherwise 0
-static inline cl_ulong add_carry(cl_ulong a, cl_ulong b, cl_ulong *carry)
+static inline cl_ulong  add_carry( cl_ulong a, cl_ulong b, cl_ulong *carry )
     cl_ulong result = a + b;
     *carry = result < a;
     return result;
-// Subtract a - b. If the result modulo overflowed, write 1 to *carry, otherwise
-// 0
-static inline cl_ulong sub_carry(cl_ulong a, cl_ulong b, cl_ulong *carry)
+// Subtract a - b. If the result modulo overflowed, write 1 to *carry, otherwise 0
+static inline cl_ulong  sub_carry( cl_ulong a, cl_ulong b, cl_ulong *carry )
     cl_ulong result = a - b;
     *carry = result > a;
     return result;
-static float fallback_frexpf(float x, int *iptr)
+static float fallback_frexpf( float x, int *iptr )
     cl_uint u, v;
     float fu, fv;
-    memcpy(&u, &x, sizeof(u));
+    memcpy( &u, &x, sizeof(u));
-    cl_uint exponent = u & 0x7f800000U;
+    cl_uint exponent = u &  0x7f800000U;
     cl_uint mantissa = u & ~0x7f800000U;
     // add 1 to the exponent
     exponent += 0x00800000U;
-    if ((cl_int)exponent < (cl_int)0x01000000)
+    if( (cl_int) exponent < (cl_int) 0x01000000 )
     { // subnormal, NaN, Inf
         mantissa |= 0x3f000000U;
         v = mantissa & 0xff800000U;
         u = mantissa;
-        memcpy(&fv, &v, sizeof(v));
-        memcpy(&fu, &u, sizeof(u));
+        memcpy( &fv, &v, sizeof(v));
+        memcpy( &fu, &u, sizeof(u));
         fu -= fv;
-        memcpy(&v, &fv, sizeof(v));
-        memcpy(&u, &fu, sizeof(u));
+        memcpy( &v, &fv, sizeof(v));
+        memcpy( &u, &fu, sizeof(u));
-        exponent = u & 0x7f800000U;
+        exponent = u &  0x7f800000U;
         mantissa = u & ~0x7f800000U;
-        *iptr = (exponent >> 23) + (-126 + 1 - 126);
+        *iptr = (exponent >> 23) + (-126 + 1 -126);
         u = mantissa | 0x3f000000U;
-        memcpy(&fu, &u, sizeof(u));
+        memcpy( &fu, &u, sizeof(u));
         return fu;
     *iptr = (exponent >> 23) - 127;
     u = mantissa | 0x3f000000U;
-    memcpy(&fu, &u, sizeof(u));
+    memcpy( &fu, &u, sizeof(u));
     return fu;
-static inline int extractf(float x, cl_uint *mant)
+static inline int extractf( float, cl_uint * );
+static inline int extractf( float x, cl_uint *mant )
-    static float (*frexppf)(float, int *) = NULL;
+    static float (*frexppf)(float, int*) = NULL;
     int e;
     // verify that frexp works properly
-    if (NULL == frexppf)
+    if( NULL == frexppf )
-        if (0.5f == frexpf(HEX_FLT(+, 1, 0, -, 130), &e) && e == -129)
+        if( 0.5f == frexpf( HEX_FLT( +, 1, 0, -, 130 ), &e ) && e == -129 )
             frexppf = frexpf;
             frexppf = fallback_frexpf;
-    *mant = (cl_uint)(HEX_FLT(+, 1, 0, +, 32) * fabsf(frexppf(x, &e)));
+    *mant = (cl_uint) (HEX_FLT( +, 1, 0, +, 32 ) * fabsf( frexppf( x, &e )));
     return e - 1;
-// Shift right by shift bits. Any bits lost on the right side are bitwise OR'd
-// together and ORd into the LSB of the result
-static inline void shift_right_sticky_64(cl_ulong *p, int shift)
+// Shift right by shift bits. Any bits lost on the right side are bitwise OR'd together and ORd into the LSB of the result
+static inline void shift_right_sticky_64( cl_ulong *p, int shift );
+static inline void shift_right_sticky_64( cl_ulong *p, int shift )
     cl_ulong sticky = 0;
     cl_ulong r = *p;
     // C doesn't handle shifts greater than the size of the variable dependably
-    if (shift >= 64)
+    if( shift >= 64 )
         sticky |= (0 != r);
         r = 0;
-        sticky |= (0 != (r << (64 - shift)));
+        sticky |= (0 != (r << (64-shift)));
         r >>= shift;
     *p = r | sticky;
-// Add two 64 bit mantissas. Bits that are below the LSB of the result are OR'd
-// into the LSB of the result
-static inline void add64(cl_ulong *p, cl_ulong c, int *exponent)
+// Add two 64 bit mantissas. Bits that are below the LSB of the result are OR'd into the LSB of the result
+static inline void add64( cl_ulong *p, cl_ulong c, int *exponent );
+static inline void add64( cl_ulong *p, cl_ulong c, int *exponent )
     cl_ulong carry;
     c = add_carry(c, *p, &carry);
-    if (carry)
+    if( carry )
-        carry = c & 1; // set aside sticky bit
-        c >>= 1; // right shift to deal with overflow
-        c |= carry
-            | 0x8000000000000000ULL; // or in carry bit, and sticky bit. The
-                                     // latter is to prevent rounding from
-                                     // believing we are exact half way case
-        *exponent = *exponent + 1; // adjust exponent
+        carry = c & 1;                              // set aside sticky bit
+        c >>= 1;                                    // right shift to deal with overflow
+        c |= carry | 0x8000000000000000ULL;         // or in carry bit, and sticky bit. The latter is to prevent rounding from believing we are exact half way case
+        *exponent = *exponent + 1;                  // adjust exponent
     *p = c;
 // IEEE-754 round to nearest, ties to even rounding
-static float round_to_nearest_even_float(cl_ulong p, int exponent)
+static float round_to_nearest_even_float( cl_ulong p, int exponent );
+static float round_to_nearest_even_float( cl_ulong p, int exponent )
-    union {
-        cl_uint u;
-        cl_float d;
-    } u;
+    union{ cl_uint u; cl_float d;} u;
     // If mantissa is zero, return 0.0f
     if (p == 0) return 0.0f;
     // edges
-    if (exponent > 127)
+    if( exponent > 127 )
-        volatile float r = exponent * CL_FLT_MAX; // signal overflow
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
         // attempt to fool the compiler into not optimizing the above line away
-        if (r > CL_FLT_MAX) return INFINITY;
+        if( r > CL_FLT_MAX )
+            return INFINITY;
         return r;
-    if (exponent == -150 && p > 0x8000000000000000ULL)
-        return HEX_FLT(+, 1, 0, -, 149);
-    if (exponent <= -150) return 0.0f;
+    if( exponent == -150 && p > 0x8000000000000000ULL)
+        return HEX_FLT( +, 1, 0, -, 149 );
+    if( exponent <= -150 )       return 0.0f;
-    // Figure out which bits go where
+    //Figure out which bits go where
     int shift = 8 + 32;
-    if (exponent < -126)
+    if( exponent < -126 )
-        shift -= 126 + exponent; // subnormal: shift is not 52
-        exponent = -127; //            set exponent to 0
+        shift -= 126 + exponent;                    // subnormal: shift is not 52
+        exponent = -127;                            //            set exponent to 0
-        p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove
-                                    // it.
+        p &= 0x7fffffffffffffffULL;                 // normal: leading bit is implicit. Remove it.
     // Assemble the double (round toward zero)
-    u.u = (cl_uint)(p >> shift) | ((cl_uint)(exponent + 127) << 23);
+    u.u = (cl_uint)(p >> shift) | ((cl_uint) (exponent + 127) << 23);
     // put a representation of the residual bits into hi
-    p <<= (64 - shift);
+    p <<= (64-shift);
-    // round to nearest, ties to even  based on the unused portion of p
-    if (p < 0x8000000000000000ULL) return u.d;
-    if (p == 0x8000000000000000ULL)
-        u.u += u.u & 1U;
-    else
-        u.u++;
+    //round to nearest, ties to even  based on the unused portion of p
+    if( p < 0x8000000000000000ULL )        return u.d;
+    if( p == 0x8000000000000000ULL )       u.u += u.u & 1U;
+    else                                   u.u++;
     return u.d;
-static float round_to_nearest_even_float_ftz(cl_ulong p, int exponent)
+static float round_to_nearest_even_float_ftz( cl_ulong p, int exponent );
+static float round_to_nearest_even_float_ftz( cl_ulong p, int exponent )
     extern int gCheckTininessBeforeRounding;
-    union {
-        cl_uint u;
-        cl_float d;
-    } u;
+    union{ cl_uint u; cl_float d;} u;
     int shift = 8 + 32;
     // If mantissa is zero, return 0.0f
     if (p == 0) return 0.0f;
     // edges
-    if (exponent > 127)
+    if( exponent > 127 )
-        volatile float r = exponent * CL_FLT_MAX; // signal overflow
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
         // attempt to fool the compiler into not optimizing the above line away
-        if (r > CL_FLT_MAX) return INFINITY;
+        if( r > CL_FLT_MAX )
+        return INFINITY;
         return r;
     // Deal with FTZ for gCheckTininessBeforeRounding
-    if (exponent < (gCheckTininessBeforeRounding - 127)) return 0.0f;
+    if( exponent < (gCheckTininessBeforeRounding - 127) )
+        return 0.0f;
-    if (exponent
-        == -127) // only happens for machines that check tininess after rounding
-        p = (p & 1) | (p >> 1);
+    if( exponent == -127 ) // only happens for machines that check tininess after rounding
+        p = (p&1) | (p>>1);
-        p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove
-                                    // it.
+        p &= 0x7fffffffffffffffULL;     // normal: leading bit is implicit. Remove it.
     cl_ulong q = p;
     // Assemble the double (round toward zero)
-    u.u = (cl_uint)(q >> shift) | ((cl_uint)(exponent + 127) << 23);
+    u.u = (cl_uint)(q >> shift) | ((cl_uint) (exponent + 127) << 23);
     // put a representation of the residual bits into hi
-    q <<= (64 - shift);
+    q <<= (64-shift);
-    // round to nearest, ties to even  based on the unused portion of p
-    if (q > 0x8000000000000000ULL)
+    //round to nearest, ties to even  based on the unused portion of p
+    if( q > 0x8000000000000000ULL )
-    else if (q == 0x8000000000000000ULL)
+    else if( q == 0x8000000000000000ULL )
         u.u += u.u & 1U;
     // Deal with FTZ for ! gCheckTininessBeforeRounding
-    if (0 == (u.u & 0x7f800000U)) return 0.0f;
+    if( 0 == (u.u & 0x7f800000U )  )
+        return 0.0f;
     return u.d;
 // IEEE-754 round toward zero.
-static float round_toward_zero_float(cl_ulong p, int exponent)
+static float round_toward_zero_float( cl_ulong p, int exponent );
+static float round_toward_zero_float( cl_ulong p, int exponent )
-    union {
-        cl_uint u;
-        cl_float d;
-    } u;
+    union{ cl_uint u; cl_float d;} u;
     // If mantissa is zero, return 0.0f
     if (p == 0) return 0.0f;
     // edges
-    if (exponent > 127)
+    if( exponent > 127 )
-        volatile float r = exponent * CL_FLT_MAX; // signal overflow
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
         // attempt to fool the compiler into not optimizing the above line away
-        if (r > CL_FLT_MAX) return CL_FLT_MAX;
+        if( r > CL_FLT_MAX )
+            return CL_FLT_MAX;
         return r;
-    if (exponent <= -149) return 0.0f;
+    if( exponent <= -149 )
+        return 0.0f;
-    // Figure out which bits go where
+    //Figure out which bits go where
     int shift = 8 + 32;
-    if (exponent < -126)
+    if( exponent < -126 )
-        shift -= 126 + exponent; // subnormal: shift is not 52
-        exponent = -127; //            set exponent to 0
+        shift -= 126 + exponent;                    // subnormal: shift is not 52
+        exponent = -127;                            //            set exponent to 0
-        p &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove
-                                    // it.
+        p &= 0x7fffffffffffffffULL;                 // normal: leading bit is implicit. Remove it.
     // Assemble the double (round toward zero)
-    u.u = (cl_uint)(p >> shift) | ((cl_uint)(exponent + 127) << 23);
+    u.u = (cl_uint)(p >> shift) | ((cl_uint) (exponent + 127) << 23);
     return u.d;
-static float round_toward_zero_float_ftz(cl_ulong p, int exponent)
+static float round_toward_zero_float_ftz( cl_ulong p, int exponent );
+static float round_toward_zero_float_ftz( cl_ulong p, int exponent )
-    union {
-        cl_uint u;
-        cl_float d;
-    } u;
+    extern int gCheckTininessBeforeRounding;
+    union{ cl_uint u; cl_float d;} u;
     int shift = 8 + 32;
     // If mantissa is zero, return 0.0f
     if (p == 0) return 0.0f;
     // edges
-    if (exponent > 127)
+    if( exponent > 127 )
-        volatile float r = exponent * CL_FLT_MAX; // signal overflow
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
         // attempt to fool the compiler into not optimizing the above line away
-        if (r > CL_FLT_MAX) return CL_FLT_MAX;
+        if( r > CL_FLT_MAX )
+            return CL_FLT_MAX;
         return r;
     // Deal with FTZ for gCheckTininessBeforeRounding
-    if (exponent < -126) return 0.0f;
+    if( exponent < -126 )
+        return 0.0f;
-    cl_ulong q = p &=
-        0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove it.
+    cl_ulong q = p &= 0x7fffffffffffffffULL;     // normal: leading bit is implicit. Remove it.
     // Assemble the double (round toward zero)
-    u.u = (cl_uint)(q >> shift) | ((cl_uint)(exponent + 127) << 23);
+    u.u = (cl_uint)(q >> shift) | ((cl_uint) (exponent + 127) << 23);
     // put a representation of the residual bits into hi
-    q <<= (64 - shift);
+    q <<= (64-shift);
     return u.d;
 // Subtract two significands.
-static inline void sub64(cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC)
+static inline void sub64( cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC );
+static inline void sub64( cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC )
     cl_ulong carry;
-    p = sub_carry(*c, p, &carry);
+    p = sub_carry( *c, p, &carry );
-    if (carry)
+    if( carry )
         *signC ^= 0x80000000U;
         p = -p;
     // normalize
-    if (p)
+    if( p )
         int shift = 32;
         cl_ulong test = 1ULL << 32;
-        while (0 == (p & 0x8000000000000000ULL))
+        while( 0 == (p & 0x8000000000000000ULL))
-            if (p < test)
+            if( p < test )
                 p <<= shift;
                 *expC = *expC - shift;
@@ -449,60 +458,49 @@
         // zero result.
         *expC = -200;
-        *signC =
-            0; // IEEE rules say a - a = +0 for all rounding modes except -inf
+        *signC = 0;     // IEEE rules say a - a = +0 for all rounding modes except -inf
     *c = p;
-float reference_fma(float a, float b, float c, int shouldFlush)
+float reference_fma( float a, float b, float c, int shouldFlush )
     static const cl_uint kMSB = 0x80000000U;
     // Make bits accessible
-    union {
-        cl_uint u;
-        cl_float d;
-    } ua;
-    ua.d = a;
-    union {
-        cl_uint u;
-        cl_float d;
-    } ub;
-    ub.d = b;
-    union {
-        cl_uint u;
-        cl_float d;
-    } uc;
-    uc.d = c;
+    union{ cl_uint u; cl_float d; } ua; ua.d = a;
+    union{ cl_uint u; cl_float d; } ub; ub.d = b;
+    union{ cl_uint u; cl_float d; } uc; uc.d = c;
     // deal with Nans, infinities and zeros
-    if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b) || isinf(c)
-        || 0 == (ua.u & ~kMSB) || // a == 0, defeat host FTZ behavior
-        0 == (ub.u & ~kMSB) || // b == 0, defeat host FTZ behavior
-        0 == (uc.u & ~kMSB)) // c == 0, defeat host FTZ behavior
+    if( isnan( a ) || isnan( b ) || isnan(c)    ||
+        isinf( a ) || isinf( b ) || isinf(c)    ||
+        0 == ( ua.u & ~kMSB)                ||  // a == 0, defeat host FTZ behavior
+        0 == ( ub.u & ~kMSB)                ||  // b == 0, defeat host FTZ behavior
+        0 == ( uc.u & ~kMSB)                )   // c == 0, defeat host FTZ behavior
         FPU_mode_type oldMode;
         RoundingMode oldRoundMode = kRoundToNearestEven;
-        if (isinf(c) && !isinf(a) && !isinf(b)) return (c + a) + b;
+        if( isinf( c ) && !isinf(a) && !isinf(b) )
+            return (c + a) + b;
-        if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
+        if (gIsInRTZMode)
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
-        memset(&oldMode, 0, sizeof(oldMode));
-        if (shouldFlush) ForceFTZ(&oldMode);
+        memset( &oldMode, 0, sizeof( oldMode ) );
+        if( shouldFlush )
+            ForceFTZ( &oldMode );
-        a = (float)reference_multiply(
-            a, b); // some risk that the compiler will insert a non-compliant
-                   // fma here on some platforms.
-        a = (float)reference_add(
-            a,
-            c); // We use STDC FP_CONTRACT OFF above to attempt to defeat that.
+        a = (float) reference_multiply( a, b );    // some risk that the compiler will insert a non-compliant fma here on some platforms.
+        a = (float) reference_add( a, c );           // We use STDC FP_CONTRACT OFF above to attempt to defeat that.
-        if (shouldFlush) RestoreFPState(&oldMode);
+        if( shouldFlush )
+            RestoreFPState( &oldMode );
-        if (gIsInRTZMode) set_round(oldRoundMode, kfloat);
+        if( gIsInRTZMode )
+            set_round(oldRoundMode, kfloat);
         return a;
@@ -510,70 +508,67 @@
     //   exponent is a standard unbiased signed integer
     //   mantissa is a cl_uint, with leading non-zero bit positioned at the MSB
     cl_uint mantA, mantB, mantC;
-    int expA = extractf(a, &mantA);
-    int expB = extractf(b, &mantB);
-    int expC = extractf(c, &mantC);
-    cl_uint signC = uc.u & kMSB; // We'll need the sign bit of C later to decide
-                                 // if we are adding or subtracting
+    int expA = extractf( a, &mantA );
+    int expB = extractf( b, &mantB );
+    int expC = extractf( c, &mantC );
+    cl_uint signC = uc.u & kMSB;                // We'll need the sign bit of C later to decide if we are adding or subtracting
-    // exact product of A and B
+// exact product of A and B
     int exponent = expA + expB;
     cl_uint sign = (ua.u ^ ub.u) & kMSB;
-    cl_ulong product = (cl_ulong)mantA * (cl_ulong)mantB;
+    cl_ulong product = (cl_ulong) mantA * (cl_ulong) mantB;
     // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999..
-    //  The MSB might not be set. If so, fix that. Otherwise, reflect the fact
-    //  that we got another power of two from the multiplication
-    if (0 == (0x8000000000000000ULL & product))
+    //  The MSB might not be set. If so, fix that. Otherwise, reflect the fact that we got another power of two from the multiplication
+    if( 0 == (0x8000000000000000ULL & product) )
         product <<= 1;
-        exponent++; // 2**31 * 2**31 gives 2**62. If the MSB was set, then our
-                    // exponent increased.
+        exponent++;         // 2**31 * 2**31 gives 2**62. If the MSB was set, then our exponent increased.
-    // infinite precision add
-    cl_ulong addend = (cl_ulong)mantC << 32;
-    if (exponent >= expC)
+//infinite precision add
+    cl_ulong addend = (cl_ulong) mantC << 32;
+    if( exponent >= expC )
         // Shift C relative to the product so that their exponents match
-        if (exponent > expC) shift_right_sticky_64(&addend, exponent - expC);
+        if( exponent > expC )
+            shift_right_sticky_64( &addend, exponent - expC );
         // Add
-        if (sign ^ signC)
-            sub64(&product, addend, &sign, &exponent);
+        if( sign ^ signC )
+            sub64( &product, addend, &sign, &exponent );
-            add64(&product, addend, &exponent);
+            add64( &product, addend, &exponent );
         // Shift the product relative to C so that their exponents match
-        shift_right_sticky_64(&product, expC - exponent);
+        shift_right_sticky_64( &product, expC - exponent );
         // add
-        if (sign ^ signC)
-            sub64(&addend, product, &signC, &expC);
+        if( sign ^ signC )
+            sub64( &addend, product, &signC, &expC );
-            add64(&addend, product, &expC);
+            add64( &addend, product, &expC );
         product = addend;
         exponent = expC;
         sign = signC;
-    // round to IEEE result -- we do not do flushing to zero here. That part is
-    // handled manually in ternary.c.
+    // round to IEEE result -- we do not do flushing to zero here. That part is handled manually in ternary.c.
     if (gIsInRTZMode)
-        if (shouldFlush)
-            ua.d = round_toward_zero_float_ftz(product, exponent);
+        if( shouldFlush )
+            ua.d = round_toward_zero_float_ftz( product, exponent);
-            ua.d = round_toward_zero_float(product, exponent);
+            ua.d = round_toward_zero_float( product, exponent);
-        if (shouldFlush)
-            ua.d = round_to_nearest_even_float_ftz(product, exponent);
+        if( shouldFlush )
+            ua.d = round_to_nearest_even_float_ftz( product, exponent);
-            ua.d = round_to_nearest_even_float(product, exponent);
+            ua.d = round_to_nearest_even_float( product, exponent);
     // Set the sign
@@ -582,36 +577,35 @@
     return ua.d;
-double reference_relaxed_exp10(double x) { return reference_exp10(x); }
-double reference_exp10(double x)
+double reference_relaxed_exp10( double x)
-    return reference_exp2(x * HEX_DBL(+, 1, a934f0979a371, +, 1));
+  return reference_exp10(x);
+double reference_exp10( double x) {   return reference_exp2( x * HEX_DBL( +, 1, a934f0979a371, +, 1 ) );    }
-int reference_ilogb(double x)
+int   reference_ilogb( double x )
     extern int gDeviceILogb0, gDeviceILogbNaN;
-    union {
-        cl_double f;
-        cl_ulong u;
-    } u;
+    union { cl_double f; cl_ulong u;} u;
-    u.f = (float)x;
-    cl_int exponent = (cl_int)(u.u >> 52) & 0x7ff;
-    if (exponent == 0x7ff)
+    u.f = (float) x;
+    cl_int exponent = (cl_int) (u.u >> 52) & 0x7ff;
+    if( exponent == 0x7ff )
-        if (u.u & 0x000fffffffffffffULL) return gDeviceILogbNaN;
+        if( u.u & 0x000fffffffffffffULL )
+            return gDeviceILogbNaN;
         return CL_INT_MAX;
-    if (exponent == 0)
-    { // deal with denormals
-        u.f = x * HEX_DBL(+, 1, 0, +, 64);
-        exponent = (cl_int)(u.u >> 52) & 0x7ff;
-        if (exponent == 0) return gDeviceILogb0;
+    if( exponent == 0 )
+    {   // deal with denormals
+        u.f = x * HEX_DBL( +, 1, 0, +, 64 );
+        exponent = (cl_int) (u.u >> 52) & 0x7ff;
+        if( exponent == 0 )
+            return gDeviceILogb0;
         return exponent - (1023 + 64);
@@ -619,205 +613,218 @@
     return exponent - 1023;
-double reference_nan(cl_uint x)
+double reference_nan( cl_uint x )
-    union {
-        cl_uint u;
-        cl_float f;
-    } u;
+    union{ cl_uint u; cl_float f; }u;
     u.u = x | 0x7fc00000U;
-    return (double)u.f;
+    return (double) u.f;
-double reference_maxmag(double x, double y)
+double reference_maxmag( double x, double y )
     double fabsx = fabs(x);
     double fabsy = fabs(y);
-    if (fabsx < fabsy) return y;
+    if( fabsx < fabsy )
+        return y;
-    if (fabsy < fabsx) return x;
+    if( fabsy < fabsx )
+        return x;
-    return reference_fmax(x, y);
+    return reference_fmax( x, y );
-double reference_minmag(double x, double y)
+double reference_minmag( double x, double y )
     double fabsx = fabs(x);
     double fabsy = fabs(y);
-    if (fabsx > fabsy) return y;
+    if( fabsx > fabsy )
+        return y;
-    if (fabsy > fabsx) return x;
+    if( fabsy > fabsx )
+        return x;
-    return reference_fmin(x, y);
+    return reference_fmin( x, y );
-double reference_relaxed_mad(double a, double b, double c)
+//double my_nextafter( double x, double y ){  return (double) nextafterf( (float) x, (float) y ); }
+double reference_relaxed_mad( double a, double b, double c)
-    return ((float)a) * ((float)b) + (float)c;
+  return ((float) a )* ((float) b) + (float) c;
-double reference_mad(double a, double b, double c) { return a * b + c; }
+double reference_mad( double a, double b, double c )
+    return a * b + c;
-double reference_recip(double x) { return 1.0 / x; }
-double reference_rootn(double x, int i)
+double reference_recip( double x) {   return 1.0 / x; }
+double reference_rootn( double x, int i )
-    // rootn ( x, 0 )  returns a NaN.
-    if (0 == i) return cl_make_nan();
+    //rootn ( x, 0 )  returns a NaN.
+    if( 0 == i )
+        return cl_make_nan();
-    // rootn ( x, n )  returns a NaN for x < 0 and n is even.
-    if (x < 0 && 0 == (i & 1)) return cl_make_nan();
+    //rootn ( x, n )  returns a NaN for x < 0 and n is even.
+    if( x < 0 && 0 == (i&1) )
+        return cl_make_nan();
-    if (x == 0.0)
+    if( x == 0.0 )
-        switch (i & 0x80000001)
+        switch( i & 0x80000001 )
-            // rootn ( +-0,  n ) is +0 for even n > 0.
-            case 0: return 0.0f;
+            //rootn ( +-0,  n ) is +0 for even n > 0.
+            case 0:
+                return 0.0f;
-            // rootn ( +-0,  n ) is +-0 for odd n > 0.
-            case 1: return x;
+            //rootn ( +-0,  n ) is +-0 for odd n > 0.
+            case 1:
+                return x;
-            // rootn ( +-0,  n ) is +inf for even n < 0.
-            case 0x80000000: return INFINITY;
+            //rootn ( +-0,  n ) is +inf for even n < 0.
+            case 0x80000000:
+                return INFINITY;
-            // rootn ( +-0,  n ) is +-inf for odd n < 0.
-            case 0x80000001: return copysign(INFINITY, x);
+            //rootn ( +-0,  n ) is +-inf for odd n < 0.
+            case 0x80000001:
+                return copysign(INFINITY, x);
     double sign = x;
     x = reference_fabs(x);
-    x = reference_exp2(reference_log2(x) / (double)i);
-    return reference_copysignd(x, sign);
+    x = reference_exp2( reference_log2(x) / (double) i );
+    return reference_copysignd( x, sign );
-double reference_rsqrt(double x) { return 1.0 / reference_sqrt(x); }
-double reference_sinpi(double x)
+double reference_rsqrt( double x) {   return 1.0 / reference_sqrt(x);   }
+//double reference_sincos( double x, double *c ){ *c = cos(x); return sin(x); }
+double reference_sinpi( double x)
     double r = reduce1(x);
     // reduce to [-0.5, 0.5]
-    if (r < -0.5)
+    if( r < -0.5 )
         r = -1 - r;
-    else if (r > 0.5)
+    else if ( r > 0.5 )
         r = 1 - r;
     // sinPi zeros have the same sign as x
-    if (r == 0.0) return reference_copysignd(0.0, x);
+    if( r == 0.0 )
+        return reference_copysignd(0.0, x);
-    return reference_sin(r * M_PI);
+    return reference_sin( r * M_PI );
-double reference_relaxed_sinpi(double x) { return reference_sinpi(x); }
-double reference_tanpi(double x)
+double reference_tanpi( double x)
     // set aside the sign  (allows us to preserve sign of -0)
-    double sign = reference_copysignd(1.0, x);
+    double sign = reference_copysignd( 1.0, x);
     double z = reference_fabs(x);
     // if big and even  -- caution: only works if x only has single precision
-    if (z >= HEX_DBL(+, 1, 0, +, 24))
+    if( z >= HEX_DBL( +, 1, 0, +, 24 ) )
-        if (z == INFINITY) return x - x; // nan
+        if( z == INFINITY )
+            return x - x;       // nan
-        return reference_copysignd(
-            0.0, x); // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
+        return reference_copysignd( 0.0, x);   // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
     // reduce to the range [ -0.5, 0.5 ]
-    double nearest = reference_rint(z); // round to nearest even places n + 0.5
-                                        // values in the right place for us
-    int i = (int)nearest; // test above against 0x1.0p24 avoids overflow here
+    double nearest = reference_rint( z );     // round to nearest even places n + 0.5 values in the right place for us
+    int i = (int) nearest;          // test above against 0x1.0p24 avoids overflow here
     z -= nearest;
-    // correction for odd integer x for the right sign of zero
-    if ((i & 1) && z == 0.0) sign = -sign;
+    //correction for odd integer x for the right sign of zero
+    if( (i&1) && z == 0.0 )
+        sign = -sign;
     // track changes to the sign
-    sign *= reference_copysignd(1.0, z); // really should just be an xor
-    z = reference_fabs(z); // remove the sign again
+    sign *= reference_copysignd(1.0, z);       // really should just be an xor
+    z = reference_fabs(z);                    // remove the sign again
     // reduce once more
-    // If we don't do this, rounding error in z * M_PI will cause us not to
-    // return infinities properly
-    if (z > 0.25)
+    // If we don't do this, rounding error in z * M_PI will cause us not to return infinities properly
+    if( z > 0.25 )
         z = 0.5 - z;
-        return sign
-            / reference_tan(z * M_PI); // use system tan to get the right result
+        return sign / reference_tan( z * M_PI );      // use system tan to get the right result
-    return sign
-        * reference_tan(z * M_PI); // use system tan to get the right result
+    return sign * reference_tan( z * M_PI );          // use system tan to get the right result
-double reference_pown(double x, int i) { return reference_pow(x, (double)i); }
-double reference_powr(double x, double y)
+double reference_pown( double x, int i) { return reference_pow( x, (double) i ); }
+double reference_powr( double x, double y )
-    // powr ( x, y ) returns NaN for x < 0.
-    if (x < 0.0) return cl_make_nan();
+    //powr ( x, y ) returns NaN for x < 0.
+    if( x < 0.0 )
+        return cl_make_nan();
-    // powr ( x, NaN ) returns the NaN for x >= 0.
-    // powr ( NaN, y ) returns the NaN.
-    if (isnan(x) || isnan(y))
-        return x + y; // Note: behavior different here than for pow(1,NaN),
-                      // pow(NaN, 0)
+    //powr ( x, NaN ) returns the NaN for x >= 0.
+    //powr ( NaN, y ) returns the NaN.
+    if( isnan(x) || isnan(y) )
+        return x + y;       // Note: behavior different here than for pow(1,NaN), pow(NaN, 0)
-    if (x == 1.0)
+    if( x == 1.0 )
-        // powr ( +1, +-inf ) returns NaN.
-        if (reference_fabs(y) == INFINITY) return cl_make_nan();
+        //powr ( +1, +-inf ) returns NaN.
+        if( reference_fabs(y) == INFINITY )
+            return cl_make_nan();
-        // powr ( +1, y ) is 1 for finite y.    (NaN handled above)
+        //powr ( +1, y ) is 1 for finite y.    (NaN handled above)
         return 1.0;
-    if (y == 0.0)
+    if( y == 0.0 )
-        // powr ( +inf, +-0 ) returns NaN.
-        // powr ( +-0, +-0 ) returns NaN.
-        if (x == 0.0 || x == INFINITY) return cl_make_nan();
+        //powr ( +inf, +-0 ) returns NaN.
+        //powr ( +-0, +-0 ) returns NaN.
+        if( x == 0.0 || x == INFINITY )
+            return cl_make_nan();
-        // powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already
-        // handled above)
+        //powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already handled above)
         return 1.0;
-    if (x == 0.0)
+    if( x == 0.0 )
-        // powr ( +-0, -inf) is +inf.
-        // powr ( +-0, y ) is +inf for finite y < 0.
-        if (y < 0.0) return INFINITY;
+        //powr ( +-0, -inf) is +inf.
+        //powr ( +-0, y ) is +inf for finite y < 0.
+        if( y < 0.0 )
+            return INFINITY;
-        // powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
+        //powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
         return 0.0;
     // x = +inf
-    if (isinf(x))
+    if( isinf(x) )
-        if (y < 0) return 0;
+        if( y < 0 )
+            return 0;
         return INFINITY;
     double fabsx = reference_fabs(x);
     double fabsy = reference_fabs(y);
-    // y = +-inf cases
-    if (isinf(fabsy))
+    //y = +-inf cases
+    if( isinf(fabsy) )
-        if (y < 0)
+        if( y < 0 )
-            if (fabsx < 1) return INFINITY;
+            if( fabsx < 1 )
+                return INFINITY;
             return 0;
-        if (fabsx < 1) return 0;
+        if( fabsx < 1 )
+            return 0;
         return INFINITY;
@@ -829,209 +836,169 @@
     return result;
-double reference_fract(double x, double *ip)
+double reference_fract( double x, double *ip )
-    if (isnan(x))
-    {
+    if(isnan(x)) {
         *ip = cl_make_nan();
         return cl_make_nan();
     float i;
-    float f = modff((float)x, &i);
-    if (f < 0.0)
+    float f = modff((float) x, &i );
+    if( f < 0.0 )
         f = 1.0f + f;
         i -= 1.0f;
-        if (f == 1.0f) f = HEX_FLT(+, 1, fffffe, -, 1);
+        if( f == 1.0f )
+            f = HEX_FLT( +, 1, fffffe, -, 1 );
     *ip = i;
     return f;
-double reference_add(double x, double y)
+//double my_fdim( double x, double y){ return fdimf( (float) x, (float) y ); }
+double reference_add( double x, double y )
-    volatile float a = (float)x;
-    volatile float b = (float)y;
+    volatile float a = (float) x;
+    volatile float b = (float) y;
-#if defined(__SSE__)                                                           \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
     // defeat x87
-    __m128 va = _mm_set_ss((float)a);
-    __m128 vb = _mm_set_ss((float)b);
-    va = _mm_add_ss(va, vb);
-    _mm_store_ss((float *)&a, va);
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_add_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
 #elif defined(__PPC__)
-    // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
-    // denorm's to zero. As such, the reference add with FTZ must be emulated in
-    // sw.
-    if (fpu_control & _FPU_MASK_NI)
-    {
-        union {
-            cl_uint u;
-            cl_float d;
-        } ua;
-        ua.d = a;
-        union {
-            cl_uint u;
-            cl_float d;
-        } ub;
-        ub.d = b;
-        cl_uint mantA, mantB;
-        cl_ulong addendA, addendB, sum;
-        int expA = extractf(a, &mantA);
-        int expB = extractf(b, &mantB);
-        cl_uint signA = ua.u & 0x80000000U;
-        cl_uint signB = ub.u & 0x80000000U;
+    // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes denorm's to zero.
+    // As such, the reference add with FTZ must be emulated in sw.
+    if (fpu_control & _FPU_MASK_NI) {
+      union{ cl_uint u; cl_float d; } ua; ua.d = a;
+      union{ cl_uint u; cl_float d; } ub; ub.d = b;
+      cl_uint mantA, mantB;
+      cl_ulong addendA, addendB, sum;
+      int expA = extractf( a, &mantA );
+      int expB = extractf( b, &mantB );
+      cl_uint signA = ua.u & 0x80000000U;
+      cl_uint signB = ub.u & 0x80000000U;
-        // Force matching exponents if an operand is 0
-        if (a == 0.0f)
-        {
-            expA = expB;
-        }
-        else if (b == 0.0f)
-        {
-            expB = expA;
-        }
+      // Force matching exponents if an operand is 0
+      if (a == 0.0f) {
+    expA = expB;
+      } else if (b == 0.0f) {
+    expB = expA;
+      }
-        addendA = (cl_ulong)mantA << 32;
-        addendB = (cl_ulong)mantB << 32;
+      addendA = (cl_ulong)mantA << 32;
+      addendB = (cl_ulong)mantB << 32;
-        if (expA >= expB)
-        {
-            // Shift B relative to the A so that their exponents match
-            if (expA > expB) shift_right_sticky_64(&addendB, expA - expB);
+      if (expA >= expB) {
+        // Shift B relative to the A so that their exponents match
+        if( expA > expB )
+      shift_right_sticky_64( &addendB, expA - expB );
-            // add
-            if (signA ^ signB)
-                sub64(&addendA, addendB, &signA, &expA);
-            else
-                add64(&addendA, addendB, &expA);
-        }
+        // add
+        if( signA ^ signB )
+      sub64( &addendA, addendB, &signA, &expA );
-        {
-            // Shift the A relative to B so that their exponents match
-            shift_right_sticky_64(&addendA, expB - expA);
+      add64( &addendA, addendB, &expA );
+      } else  {
+        // Shift the A relative to B so that their exponents match
+        shift_right_sticky_64( &addendA, expB - expA );
-            // add
-            if (signA ^ signB)
-                sub64(&addendB, addendA, &signB, &expB);
-            else
-                add64(&addendB, addendA, &expB);
-            addendA = addendB;
-            expA = expB;
-            signA = signB;
-        }
-        // round to IEEE result
-        if (gIsInRTZMode)
-        {
-            ua.d = round_toward_zero_float_ftz(addendA, expA);
-        }
+        // add
+        if( signA ^ signB )
+      sub64( &addendB, addendA, &signB, &expB );
-        {
-            ua.d = round_to_nearest_even_float_ftz(addendA, expA);
-        }
-        // Set the sign
-        ua.u |= signA;
-        a = ua.d;
-    }
-    else
-    {
-        a += b;
+      add64( &addendB, addendA, &expB );
+        addendA = addendB;
+        expA = expB;
+        signA = signB;
+      }
+      // round to IEEE result
+      if (gIsInRTZMode)    {
+    ua.d = round_toward_zero_float_ftz( addendA, expA );
+      } else {
+    ua.d = round_to_nearest_even_float_ftz( addendA, expA );
+      }
+      // Set the sign
+      ua.u |= signA;
+      a = ua.d;
+    } else {
+      a += b;
     a += b;
-    return (double)a;
+    return (double) a;
+ }
-double reference_subtract(double x, double y)
+double reference_subtract( double x, double y )
-    volatile float a = (float)x;
-    volatile float b = (float)y;
-#if defined(__SSE__)                                                           \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
+    volatile float a = (float) x;
+    volatile float b = (float) y;
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
     // defeat x87
-    __m128 va = _mm_set_ss((float)a);
-    __m128 vb = _mm_set_ss((float)b);
-    va = _mm_sub_ss(va, vb);
-    _mm_store_ss((float *)&a, va);
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_sub_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
     a -= b;
     return a;
-double reference_multiply(double x, double y)
+//double reference_divide( double x, double y ){ return (float) x / (float) y; }
+double reference_multiply( double x, double y)
-    volatile float a = (float)x;
-    volatile float b = (float)y;
-#if defined(__SSE__)                                                           \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
+    volatile float a = (float) x;
+    volatile float b = (float) y;
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
     // defeat x87
-    __m128 va = _mm_set_ss((float)a);
-    __m128 vb = _mm_set_ss((float)b);
-    va = _mm_mul_ss(va, vb);
-    _mm_store_ss((float *)&a, va);
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_mul_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
 #elif defined(__PPC__)
-    // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
-    // denorm's to zero. As such, the reference multiply with FTZ must be
-    // emulated in sw.
-    if (fpu_control & _FPU_MASK_NI)
-    {
-        // extract exponent and mantissa
-        //   exponent is a standard unbiased signed integer
-        //   mantissa is a cl_uint, with leading non-zero bit positioned at the
-        //   MSB
-        union {
-            cl_uint u;
-            cl_float d;
-        } ua;
-        ua.d = a;
-        union {
-            cl_uint u;
-            cl_float d;
-        } ub;
-        ub.d = b;
-        cl_uint mantA, mantB;
-        int expA = extractf(a, &mantA);
-        int expB = extractf(b, &mantB);
+    // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes denorm's to zero.
+    // As such, the reference multiply with FTZ must be emulated in sw.
+    if (fpu_control & _FPU_MASK_NI) {
+      // extract exponent and mantissa
+      //   exponent is a standard unbiased signed integer
+      //   mantissa is a cl_uint, with leading non-zero bit positioned at the MSB
+      union{ cl_uint u; cl_float d; } ua; ua.d = a;
+      union{ cl_uint u; cl_float d; } ub; ub.d = b;
+      cl_uint mantA, mantB;
+      int expA = extractf( a, &mantA );
+      int expB = extractf( b, &mantB );
-        // exact product of A and B
-        int exponent = expA + expB;
-        cl_uint sign = (ua.u ^ ub.u) & 0x80000000U;
-        cl_ulong product = (cl_ulong)mantA * (cl_ulong)mantB;
+      // exact product of A and B
+      int exponent = expA + expB;
+      cl_uint sign = (ua.u ^ ub.u) & 0x80000000U;
+      cl_ulong product = (cl_ulong) mantA * (cl_ulong) mantB;
-        // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999..
-        //  The MSB might not be set. If so, fix that. Otherwise, reflect the
-        //  fact that we got another power of two from the multiplication
-        if (0 == (0x8000000000000000ULL & product))
-            product <<= 1;
-        else
-            exponent++; // 2**31 * 2**31 gives 2**62. If the MSB was set, then
-                        // our exponent increased.
+      // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999..
+      //  The MSB might not be set. If so, fix that. Otherwise, reflect the fact that we got another power of two from the multiplication
+      if( 0 == (0x8000000000000000ULL & product) )
+        product <<= 1;
+      else
+        exponent++;         // 2**31 * 2**31 gives 2**62. If the MSB was set, then our exponent increased.
-        // round to IEEE result -- we do not do flushing to zero here. That part
-        // is handled manually in ternary.c.
-        if (gIsInRTZMode)
-        {
-            ua.d = round_toward_zero_float_ftz(product, exponent);
-        }
-        else
-        {
-            ua.d = round_to_nearest_even_float_ftz(product, exponent);
-        }
-        // Set the sign
-        ua.u |= sign;
-        a = ua.d;
-    }
-    else
-    {
-        a *= b;
+      // round to IEEE result -- we do not do flushing to zero here. That part is handled manually in ternary.c.
+      if (gIsInRTZMode)    {
+    ua.d = round_toward_zero_float_ftz( product, exponent);
+      } else {
+    ua.d = round_to_nearest_even_float_ftz( product, exponent);
+      }
+      // Set the sign
+      ua.u |= sign;
+      a = ua.d;
+    } else {
+      a *= b;
     a *= b;
@@ -1039,7 +1006,19 @@
     return a;
-double reference_lgamma_r(double x, int *signp)
+/*double my_remquo( double x, double y, int *iptr )
+    if( isnan(x) || isnan(y) ||
+        fabs(x) == INFINITY  ||
+        y == 0.0 )
+    {
+        *iptr = 0;
+        return NAN;
+    }
+    return (double) remquof( (float) x, (float) y, iptr );
+double reference_lgamma_r( double x, int *signp )
     // This is not currently tested
     *signp = 0;
@@ -1047,276 +1026,264 @@
-int reference_isequal(double x, double y) { return x == y; }
-int reference_isfinite(double x) { return 0 != isfinite(x); }
-int reference_isgreater(double x, double y) { return x > y; }
-int reference_isgreaterequal(double x, double y) { return x >= y; }
-int reference_isinf(double x) { return 0 != isinf(x); }
-int reference_isless(double x, double y) { return x < y; }
-int reference_islessequal(double x, double y) { return x <= y; }
-int reference_islessgreater(double x, double y)
-    return 0 != islessgreater(x, y);
-int reference_isnan(double x) { return 0 != isnan(x); }
-int reference_isnormal(double x) { return 0 != isnormal((float)x); }
-int reference_isnotequal(double x, double y) { return x != y; }
-int reference_isordered(double x, double y) { return x == x && y == y; }
-int reference_isunordered(double x, double y) { return isnan(x) || isnan(y); }
-int reference_signbit(float x) { return 0 != signbit(x); }
+int reference_isequal( double x, double y ){ return x == y; }
+int reference_isfinite( double x ){ return 0 != isfinite(x); }
+int reference_isgreater( double x, double y ){ return x > y; }
+int reference_isgreaterequal( double x, double y ){ return x >= y; }
+int reference_isinf( double x ){ return 0 != isinf(x); }
+int reference_isless( double x, double y ){ return x < y; }
+int reference_islessequal( double x, double y ){ return x <= y; }
+int reference_islessgreater( double x, double y ){  return 0 != islessgreater( x, y ); }
+int reference_isnan( double x ){ return 0 != isnan( x ); }
+int reference_isnormal( double x ){ return 0 != isnormal( (float) x ); }
+int reference_isnotequal( double x, double y ){ return x != y; }
+int reference_isordered( double x, double y){ return x == x && y == y; }
+int reference_isunordered( double x, double y ){ return isnan(x) || isnan( y ); }
+int reference_signbit( float x ){ return 0 != signbit( x ); }
 #if 1 // defined( _MSC_VER )
-// Missing functions for win32
+//Missing functions for win32
-float reference_copysign(float x, float y)
+float reference_copysign( float x, float y )
-    union {
-        float f;
-        cl_uint u;
-    } ux, uy;
-    ux.f = x;
-    uy.f = y;
+    union { float f; cl_uint u;} ux, uy;
+    ux.f = x; uy.f = y;
     ux.u &= 0x7fffffffU;
     ux.u |= uy.u & 0x80000000U;
     return ux.f;
-double reference_copysignd(double x, double y)
+double reference_copysignd( double x, double y )
-    union {
-        double f;
-        cl_ulong u;
-    } ux, uy;
-    ux.f = x;
-    uy.f = y;
+    union { double f; cl_ulong u;} ux, uy;
+    ux.f = x; uy.f = y;
     ux.u &= 0x7fffffffffffffffULL;
     ux.u |= uy.u & 0x8000000000000000ULL;
     return ux.f;
-double reference_round(double x)
+double reference_round( double x )
     double absx = reference_fabs(x);
-    if (absx < 0.5) return reference_copysignd(0.0, x);
+    if( absx < 0.5 )
+        return reference_copysignd( 0.0, x );
-    if (absx < HEX_DBL(+, 1, 0, +, 53))
-        x = reference_trunc(x + reference_copysignd(0.5, x));
+    if( absx < HEX_DBL( +, 1, 0, +, 53 ) )
+        x = reference_trunc( x + reference_copysignd( 0.5, x ) );
     return x;
-double reference_trunc(double x)
+double reference_trunc( double x )
-    if (fabs(x) < HEX_DBL(+, 1, 0, +, 53))
+    if( fabs(x) < HEX_DBL( +, 1, 0, +, 53 ) )
-        cl_long l = (cl_long)x;
+        cl_long l = (cl_long) x;
-        return reference_copysignd((double)l, x);
+        return reference_copysignd( (double) l, x );
     return x;
 #ifndef FP_ILOGB0
-#define FP_ILOGB0 INT_MIN
+    #define FP_ILOGB0   INT_MIN
 #ifndef FP_ILOGBNAN
+    #define FP_ILOGBNAN   INT_MAX
-double reference_cbrt(double x)
-    return reference_copysignd(reference_pow(reference_fabs(x), 1.0 / 3.0), x);
-double reference_rint(double x)
+double reference_cbrt(double x){ return reference_copysignd( reference_pow( reference_fabs(x), 1.0/3.0 ), x ); }
+double reference_scalbn(double x, int i)
+{ // suitable for checking single precision scalbnf only
+    if( i > 300 )
+        return copysign( INFINITY, x);
+    if( i < -300 )
+        return copysign( 0.0, x);
+    union{ cl_ulong u; double d;} u;
+    u.u = ((cl_ulong) i + 1023) << 52;
+    return x * u.d;
+double reference_rint( double x )
-    if (reference_fabs(x) < HEX_DBL(+, 1, 0, +, 52))
+    if( reference_fabs(x) < HEX_DBL( +, 1, 0, +, 52 )  )
-        double magic = reference_copysignd(HEX_DBL(+, 1, 0, +, 52), x);
+        double magic = reference_copysignd( HEX_DBL( +, 1, 0, +, 52 ), x );
         double rounded = (x + magic) - magic;
-        x = reference_copysignd(rounded, x);
+        x = reference_copysignd( rounded, x );
     return x;
-double reference_acosh(double x)
+double reference_acosh( double x )
 { // not full precision. Sufficient precision to cover float
-    if (isnan(x)) return x + x;
+    if( isnan(x) )
+        return x + x;
-    if (x < 1.0) return cl_make_nan();
+    if( x < 1.0 )
+        return cl_make_nan();
-    return reference_log(x + reference_sqrt(x + 1) * reference_sqrt(x - 1));
+    return reference_log( x + reference_sqrt(x + 1) * reference_sqrt(x-1) );
-double reference_asinh(double x)
+double reference_asinh( double x )
-    /*
-     * ====================================================
-     * This function is from fdlibm:
-     *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     */
-    if (isnan(x) || isinf(x)) return x + x;
+ * ====================================================
+ * This function is from fdlibm:
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if( isnan(x) || isinf(x) )
+        return x + x;
     double absx = reference_fabs(x);
-    if (absx < HEX_DBL(+, 1, 0, -, 28)) return x;
+    if( absx < HEX_DBL( +, 1, 0, -, 28 ) )
+        return x;
     double sign = reference_copysignd(1.0, x);
-    if (absx > HEX_DBL(+, 1, 0, +, 28))
-        return sign
-            * (reference_log(absx)
-               + 0.693147180559945309417232121458176568); // log(2)
+    if( absx > HEX_DBL( +, 1, 0, +, 28 ) )
+        return sign * (reference_log( absx ) + 0.693147180559945309417232121458176568);    // log(2)
-    if (absx > 2.0)
-        return sign
-            * reference_log(2.0 * absx
-                            + 1.0 / (reference_sqrt(x * x + 1.0) + absx));
+    if( absx > 2.0 )
+        return sign * reference_log( 2.0 * absx + 1.0 / (reference_sqrt( x * x + 1.0 ) + absx));
-    return sign
-        * reference_log1p(absx + x * x / (1.0 + reference_sqrt(1.0 + x * x)));
+    return sign * reference_log1p( absx + x*x / (1.0 + reference_sqrt(1.0 + x*x)));
-double reference_atanh(double x)
+double reference_atanh( double x )
-    /*
-     * ====================================================
-     * This function is from fdlibm:
-     *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     */
-    if (isnan(x)) return x + x;
+ * ====================================================
+ * This function is from fdlibm:
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if( isnan(x)  )
+        return x + x;
-    double signed_half = reference_copysignd(0.5, x);
+    double signed_half = reference_copysignd( 0.5, x );
     x = reference_fabs(x);
-    if (x > 1.0) return cl_make_nan();
+    if( x > 1.0 )
+        return cl_make_nan();
-    if (x < 0.5)
-        return signed_half * reference_log1p(2.0 * (x + x * x / (1 - x)));
+    if( x < 0.5 )
+        return signed_half * reference_log1p( 2.0 * ( x + x*x / (1-x) ) );
-    return signed_half * reference_log1p(2.0 * x / (1 - x));
+    return signed_half * reference_log1p(2.0 * x / (1-x));
-double reference_relaxed_atan(double x) { return reference_atan(x); }
+double reference_relaxed_exp2( double x )
+  return reference_exp2(x);
-double reference_relaxed_exp2(double x) { return reference_exp2(x); }
-double reference_exp2(double x)
-{ // Note: only suitable for verifying single precision. Doesn't have range of a
-  // full double exp2 implementation.
-    if (x == 0.0) return 1.0;
+double reference_exp2( double x )
+{ // Note: only suitable for verifying single precision. Doesn't have range of a full double exp2 implementation.
+    if( x == 0.0 )
+        return 1.0;
     // separate x into fractional and integer parts
-    double i = reference_rint(x); // round to nearest integer
+    double i = reference_rint( x );        // round to nearest integer
-    if (i < -150) return 0.0;
+    if( i < -150 )
+        return 0.0;
-    if (i > 129) return INFINITY;
+    if( i > 129 )
+        return INFINITY;
-    double f = x - i; // -0.5 <= f <= 0.5
+    double f = x - i;            // -0.5 <= f <= 0.5
     // find exp2(f)
     // calculate as p(f) = (exp2(f)-1)/f
     //              exp2(f) = f * p(f) + 1
     // p(f) is a minimax polynomial with error within 0x1.c1fd80f0d1ab7p-50
-    double p = 0.693147180560184539289
-        + (0.240226506955902863183
-           + (0.055504108656833424373
-              + (0.009618129212846484796
-                 + (0.001333355902958566035
-                    + (0.000154034191902497930
-                       + (0.000015252317761038105
-                          + (0.000001326283129417092
-                             + 0.000000102593187638680 * f)
-                              * f)
-                           * f)
-                        * f)
-                     * f)
-                  * f)
-               * f)
-            * f;
+    double p = 0.693147180560184539289 +
+               (0.240226506955902863183 +
+               (0.055504108656833424373 +
+               (0.009618129212846484796 +
+               (0.001333355902958566035 +
+               (0.000154034191902497930 +
+               (0.000015252317761038105 +
+               (0.000001326283129417092 + 0.000000102593187638680 * f)*f)*f)*f)*f)*f)*f)*f;
     f *= p;
     f += 1.0;
     // scale by 2 ** i
-    union {
-        cl_ulong u;
-        double d;
-    } u;
-    int exponent = (int)i + 1023;
-    u.u = (cl_ulong)exponent << 52;
+    union{ cl_ulong u; double d; } u;
+    int exponent = (int) i + 1023;
+    u.u = (cl_ulong) exponent << 52;
     return f * u.d;
-double reference_expm1(double x)
-{ // Note: only suitable for verifying single precision. Doesn't have range of a
-  // full double expm1 implementation. It is only accurate to 47 bits or less.
+double reference_expm1( double x )
+{ // Note: only suitable for verifying single precision. Doesn't have range of a full double expm1 implementation. It is only accurate to 47 bits or less.
     // early out for small numbers and NaNs
-    if (!(reference_fabs(x) > HEX_DBL(+, 1, 0, -, 24))) return x;
+    if( ! (reference_fabs(x) > HEX_DBL( +, 1, 0, -, 24 )) )
+        return x;
     // early out for large negative numbers
-    if (x < -130.0) return -1.0;
+    if( x < -130.0 )
+        return -1.0;
     // early out for large positive numbers
-    if (x > 100.0) return INFINITY;
+    if( x > 100.0 )
+        return INFINITY;
     // separate x into fractional and integer parts
-    double i = reference_rint(x); // round to nearest integer
-    double f = x - i; // -0.5 <= f <= 0.5
+    double i = reference_rint( x );        // round to nearest integer
+    double f = x - i;            // -0.5 <= f <= 0.5
     // reduce f to the range -0.0625 .. f.. 0.0625
-    int index = (int)(f * 16.0) + 8; // 0...16
+    int index = (int) (f * 16.0) + 8;       // 0...16
-    static const double reduction[17] = { -0.5,  -0.4375, -0.375, -0.3125,
-                                          -0.25, -0.1875, -0.125, -0.0625,
-                                          0.0,   +0.0625, +0.125, +0.1875,
-                                          +0.25, +0.3125, +0.375, +0.4375,
-                                          +0.5 };
+    static const double reduction[17] = { -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625,
+                                           0.0,
+                                          +0.0625, +0.125, +0.1875, +0.25, +0.3125, +0.375, +0.4375, +0.5  };
     // exponentials[i] = expm1(reduction[i])
-    static const double exponentials[17] = {
-        HEX_DBL(-, 1, 92e9a0720d3ec, -, 2),
-        HEX_DBL(-, 1, 6adb1cd9205ee, -, 2),
-        HEX_DBL(-, 1, 40373d42ce2e3, -, 2),
-        HEX_DBL(-, 1, 12d35a41ba104, -, 2),
-        HEX_DBL(-, 1, c5041854df7d4, -, 3),
-        HEX_DBL(-, 1, 5e25fb4fde211, -, 3),
-        HEX_DBL(-, 1, e14aed893eef4, -, 4),
-        HEX_DBL(-, 1, f0540438fd5c3, -, 5),
-        HEX_DBL(+, 0, 0, +, 0),
-        HEX_DBL(+, 1, 082b577d34ed8, -, 4),
-        HEX_DBL(+, 1, 10b022db7ae68, -, 3),
-        HEX_DBL(+, 1, a65c0b85ac1a9, -, 3),
-        HEX_DBL(+, 1, 22d78f0fa061a, -, 2),
-        HEX_DBL(+, 1, 77a45d8117fd5, -, 2),
-        HEX_DBL(+, 1, d1e944f6fbdaa, -, 2),
-        HEX_DBL(+, 1, 190048ef6002, -, 1),
-        HEX_DBL(+, 1, 4c2531c3c0d38, -, 1),
-    };
+    static const double exponentials[17] = {    HEX_DBL( -, 1, 92e9a0720d3ec, -, 2 ),    HEX_DBL( -, 1, 6adb1cd9205ee, -, 2 ),
+                                                HEX_DBL( -, 1, 40373d42ce2e3, -, 2 ),    HEX_DBL( -, 1, 12d35a41ba104, -, 2 ),
+                                                HEX_DBL( -, 1, c5041854df7d4, -, 3 ),    HEX_DBL( -, 1, 5e25fb4fde211, -, 3 ),
+                                                HEX_DBL( -, 1, e14aed893eef4, -, 4 ),    HEX_DBL( -, 1, f0540438fd5c3, -, 5 ),
+                                                HEX_DBL( +, 0, 0,             +, 0 ),
+                                                HEX_DBL( +, 1, 082b577d34ed8, -, 4 ),    HEX_DBL( +, 1, 10b022db7ae68, -, 3 ),
+                                                HEX_DBL( +, 1, a65c0b85ac1a9, -, 3 ),    HEX_DBL( +, 1, 22d78f0fa061a, -, 2 ),
+                                                HEX_DBL( +, 1, 77a45d8117fd5, -, 2 ),    HEX_DBL( +, 1, d1e944f6fbdaa, -, 2 ),
+                                                HEX_DBL( +, 1, 190048ef6002,  -, 1 ),    HEX_DBL( +, 1, 4c2531c3c0d38, -, 1 ),
+                                            };
     f -= reduction[index];
@@ -1324,368 +1291,223 @@
     // find expm1(f)
     // calculate as p(f) = (exp(f)-1)/f
     //              expm1(f) = f * p(f)
-    // p(f) is a minimax polynomial with error within 0x1.1d7693618d001p-48 over
-    // the range +- 0.0625
-    double p = 0.999999999999998001599
-        + (0.499999999999839628284
-           + (0.166666666672817459505
-              + (0.041666666612283048687
-                 + (0.008333330214567431435
-                    + (0.001389005319303770070 + 0.000198833381525156667 * f)
-                        * f)
-                     * f)
-                  * f)
-               * f)
-            * f;
+    // p(f) is a minimax polynomial with error within 0x1.1d7693618d001p-48 over the range +- 0.0625
+    double p = 0.999999999999998001599 +
+               (0.499999999999839628284 +
+               (0.166666666672817459505 +
+               (0.041666666612283048687 +
+               (0.008333330214567431435 +
+               (0.001389005319303770070 + 0.000198833381525156667 * f)*f)*f)*f)*f)*f;
     f *= p; // expm1( reduced f )
     // expm1(f) = (exmp1( reduced_f) + 1.0) * ( exponentials[index] + 1 ) - 1
-    //          =  exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) +
-    //          exponentials[index] + 1 -1 =  exmp1( reduced_f) *
-    //          exponentials[index] + exmp1( reduced_f) + exponentials[index]
-    f += exponentials[index] + f * exponentials[index];
+    //          =  exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) + exponentials[index] + 1 -1
+    //          =  exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) + exponentials[index]
+    f +=  exponentials[index] + f * exponentials[index];
     // scale by e ** i
-    int exponent = (int)i;
-    if (0 == exponent) return f; // precise answer for x near 1
+    int exponent = (int) i;
+    if( 0 == exponent )
+        return f;       // precise answer for x near 1
     // table of e**(i-150)
-    static const double exp_table[128 + 150 + 1] = {
-        HEX_DBL(+, 1, 82e16284f5ec5, -, 217),
-        HEX_DBL(+, 1, 06e9996332ba1, -, 215),
-        HEX_DBL(+, 1, 6555cb289e44b, -, 214),
-        HEX_DBL(+, 1, e5ab364643354, -, 213),
-        HEX_DBL(+, 1, 4a0bd18e64df7, -, 211),
-        HEX_DBL(+, 1, c094499cc578e, -, 210),
-        HEX_DBL(+, 1, 30d759323998c, -, 208),
-        HEX_DBL(+, 1, 9e5278ab1d4cf, -, 207),
-        HEX_DBL(+, 1, 198fa3f30be25, -, 205),
-        HEX_DBL(+, 1, 7eae636d6144e, -, 204),
-        HEX_DBL(+, 1, 040f1036f4863, -, 202),
-        HEX_DBL(+, 1, 6174e477a895f, -, 201),
-        HEX_DBL(+, 1, e065b82dd95a, -, 200),
-        HEX_DBL(+, 1, 4676be491d129, -, 198),
-        HEX_DBL(+, 1, bbb5da5f7c823, -, 197),
-        HEX_DBL(+, 1, 2d884eef5fdcb, -, 195),
-        HEX_DBL(+, 1, 99d3397ab8371, -, 194),
-        HEX_DBL(+, 1, 1681497ed15b3, -, 192),
-        HEX_DBL(+, 1, 7a870f597fdbd, -, 191),
-        HEX_DBL(+, 1, 013c74edba307, -, 189),
-        HEX_DBL(+, 1, 5d9ec4ada7938, -, 188),
-        HEX_DBL(+, 1, db2edfd20fa7c, -, 187),
-        HEX_DBL(+, 1, 42eb9f39afb0b, -, 185),
-        HEX_DBL(+, 1, b6e4f282b43f4, -, 184),
-        HEX_DBL(+, 1, 2a42764857b19, -, 182),
-        HEX_DBL(+, 1, 9560792d19314, -, 181),
-        HEX_DBL(+, 1, 137b6ce8e052c, -, 179),
-        HEX_DBL(+, 1, 766b45dd84f18, -, 178),
-        HEX_DBL(+, 1, fce362fe6e7d, -, 177),
-        HEX_DBL(+, 1, 59d34dd8a5473, -, 175),
-        HEX_DBL(+, 1, d606847fc727a, -, 174),
-        HEX_DBL(+, 1, 3f6a58b795de3, -, 172),
-        HEX_DBL(+, 1, b2216c6efdac1, -, 171),
-        HEX_DBL(+, 1, 2705b5b153fb8, -, 169),
-        HEX_DBL(+, 1, 90fa1509bd50d, -, 168),
-        HEX_DBL(+, 1, 107df698da211, -, 166),
-        HEX_DBL(+, 1, 725ae6e7b9d35, -, 165),
-        HEX_DBL(+, 1, f75d6040aeff6, -, 164),
-        HEX_DBL(+, 1, 56126259e093c, -, 162),
-        HEX_DBL(+, 1, d0ec7df4f7bd4, -, 161),
-        HEX_DBL(+, 1, 3bf2cf6722e46, -, 159),
-        HEX_DBL(+, 1, ad6b22f55db42, -, 158),
-        HEX_DBL(+, 1, 23d1f3e5834a, -, 156),
-        HEX_DBL(+, 1, 8c9feab89b876, -, 155),
-        HEX_DBL(+, 1, 0d88cf37f00dd, -, 153),
-        HEX_DBL(+, 1, 6e55d2bf838a7, -, 152),
-        HEX_DBL(+, 1, f1e6b68529e33, -, 151),
-        HEX_DBL(+, 1, 525be4e4e601d, -, 149),
-        HEX_DBL(+, 1, cbe0a45f75eb1, -, 148),
-        HEX_DBL(+, 1, 3884e838aea68, -, 146),
-        HEX_DBL(+, 1, a8c1f14e2af5d, -, 145),
-        HEX_DBL(+, 1, 20a717e64a9bd, -, 143),
-        HEX_DBL(+, 1, 8851d84118908, -, 142),
-        HEX_DBL(+, 1, 0a9bdfb02d24, -, 140),
-        HEX_DBL(+, 1, 6a5bea046b42e, -, 139),
-        HEX_DBL(+, 1, ec7f3b269efa8, -, 138),
-        HEX_DBL(+, 1, 4eafb87eab0f2, -, 136),
-        HEX_DBL(+, 1, c6e2d05bbc, -, 135),
-        HEX_DBL(+, 1, 35208867c2683, -, 133),
-        HEX_DBL(+, 1, a425b317eeacd, -, 132),
-        HEX_DBL(+, 1, 1d8508fa8246a, -, 130),
-        HEX_DBL(+, 1, 840fbc08fdc8a, -, 129),
-        HEX_DBL(+, 1, 07b7112bc1ffe, -, 127),
-        HEX_DBL(+, 1, 666d0dad2961d, -, 126),
-        HEX_DBL(+, 1, e726c3f64d0fe, -, 125),
-        HEX_DBL(+, 1, 4b0dc07cabf98, -, 123),
-        HEX_DBL(+, 1, c1f2daf3b6a46, -, 122),
-        HEX_DBL(+, 1, 31c5957a47de2, -, 120),
-        HEX_DBL(+, 1, 9f96445648b9f, -, 119),
-        HEX_DBL(+, 1, 1a6baeadb4fd1, -, 117),
-        HEX_DBL(+, 1, 7fd974d372e45, -, 116),
-        HEX_DBL(+, 1, 04da4d1452919, -, 114),
-        HEX_DBL(+, 1, 62891f06b345, -, 113),
-        HEX_DBL(+, 1, e1dd273aa8a4a, -, 112),
-        HEX_DBL(+, 1, 4775e0840bfdd, -, 110),
-        HEX_DBL(+, 1, bd109d9d94bda, -, 109),
-        HEX_DBL(+, 1, 2e73f53fba844, -, 107),
-        HEX_DBL(+, 1, 9b138170d6bfe, -, 106),
-        HEX_DBL(+, 1, 175af0cf60ec5, -, 104),
-        HEX_DBL(+, 1, 7baee1bffa80b, -, 103),
-        HEX_DBL(+, 1, 02057d1245ceb, -, 101),
-        HEX_DBL(+, 1, 5eafffb34ba31, -, 100),
-        HEX_DBL(+, 1, dca23bae16424, -, 99),
-        HEX_DBL(+, 1, 43e7fc88b8056, -, 97),
-        HEX_DBL(+, 1, b83bf23a9a9eb, -, 96),
-        HEX_DBL(+, 1, 2b2b8dd05b318, -, 94),
-        HEX_DBL(+, 1, 969d47321e4cc, -, 93),
-        HEX_DBL(+, 1, 1452b7723aed2, -, 91),
-        HEX_DBL(+, 1, 778fe2497184c, -, 90),
-        HEX_DBL(+, 1, fe7116182e9cc, -, 89),
-        HEX_DBL(+, 1, 5ae191a99585a, -, 87),
-        HEX_DBL(+, 1, d775d87da854d, -, 86),
-        HEX_DBL(+, 1, 4063f8cc8bb98, -, 84),
-        HEX_DBL(+, 1, b374b315f87c1, -, 83),
-        HEX_DBL(+, 1, 27ec458c65e3c, -, 81),
-        HEX_DBL(+, 1, 923372c67a074, -, 80),
-        HEX_DBL(+, 1, 1152eaeb73c08, -, 78),
-        HEX_DBL(+, 1, 737c5645114b5, -, 77),
-        HEX_DBL(+, 1, f8e6c24b5592e, -, 76),
-        HEX_DBL(+, 1, 571db733a9d61, -, 74),
-        HEX_DBL(+, 1, d257d547e083f, -, 73),
-        HEX_DBL(+, 1, 3ce9b9de78f85, -, 71),
-        HEX_DBL(+, 1, aebabae3a41b5, -, 70),
-        HEX_DBL(+, 1, 24b6031b49bda, -, 68),
-        HEX_DBL(+, 1, 8dd5e1bb09d7e, -, 67),
-        HEX_DBL(+, 1, 0e5b73d1ff53d, -, 65),
-        HEX_DBL(+, 1, 6f741de1748ec, -, 64),
-        HEX_DBL(+, 1, f36bd37f42f3e, -, 63),
-        HEX_DBL(+, 1, 536452ee2f75c, -, 61),
-        HEX_DBL(+, 1, cd480a1b7482, -, 60),
-        HEX_DBL(+, 1, 39792499b1a24, -, 58),
-        HEX_DBL(+, 1, aa0de4bf35b38, -, 57),
-        HEX_DBL(+, 1, 2188ad6ae3303, -, 55),
-        HEX_DBL(+, 1, 898471fca6055, -, 54),
-        HEX_DBL(+, 1, 0b6c3afdde064, -, 52),
-        HEX_DBL(+, 1, 6b7719a59f0e, -, 51),
-        HEX_DBL(+, 1, ee001eed62aa, -, 50),
-        HEX_DBL(+, 1, 4fb547c775da8, -, 48),
-        HEX_DBL(+, 1, c8464f7616468, -, 47),
-        HEX_DBL(+, 1, 36121e24d3bba, -, 45),
-        HEX_DBL(+, 1, a56e0c2ac7f75, -, 44),
-        HEX_DBL(+, 1, 1e642baeb84a, -, 42),
-        HEX_DBL(+, 1, 853f01d6d53ba, -, 41),
-        HEX_DBL(+, 1, 0885298767e9a, -, 39),
-        HEX_DBL(+, 1, 67852a7007e42, -, 38),
-        HEX_DBL(+, 1, e8a37a45fc32e, -, 37),
-        HEX_DBL(+, 1, 4c1078fe9228a, -, 35),
-        HEX_DBL(+, 1, c3527e433fab1, -, 34),
-        HEX_DBL(+, 1, 32b48bf117da2, -, 32),
-        HEX_DBL(+, 1, a0db0d0ddb3ec, -, 31),
-        HEX_DBL(+, 1, 1b48655f37267, -, 29),
-        HEX_DBL(+, 1, 81056ff2c5772, -, 28),
-        HEX_DBL(+, 1, 05a628c699fa1, -, 26),
-        HEX_DBL(+, 1, 639e3175a689d, -, 25),
-        HEX_DBL(+, 1, e355bbaee85cb, -, 24),
-        HEX_DBL(+, 1, 4875ca227ec38, -, 22),
-        HEX_DBL(+, 1, be6c6fdb01612, -, 21),
-        HEX_DBL(+, 1, 2f6053b981d98, -, 19),
-        HEX_DBL(+, 1, 9c54c3b43bc8b, -, 18),
-        HEX_DBL(+, 1, 18354238f6764, -, 16),
-        HEX_DBL(+, 1, 7cd79b5647c9b, -, 15),
-        HEX_DBL(+, 1, 02cf22526545a, -, 13),
-        HEX_DBL(+, 1, 5fc21041027ad, -, 12),
-        HEX_DBL(+, 1, de16b9c24a98f, -, 11),
-        HEX_DBL(+, 1, 44e51f113d4d6, -, 9),
-        HEX_DBL(+, 1, b993fe00d5376, -, 8),
-        HEX_DBL(+, 1, 2c155b8213cf4, -, 6),
-        HEX_DBL(+, 1, 97db0ccceb0af, -, 5),
-        HEX_DBL(+, 1, 152aaa3bf81cc, -, 3),
-        HEX_DBL(+, 1, 78b56362cef38, -, 2),
-        HEX_DBL(+, 1, 0, +, 0),
-        HEX_DBL(+, 1, 5bf0a8b145769, +, 1),
-        HEX_DBL(+, 1, d8e64b8d4ddae, +, 2),
-        HEX_DBL(+, 1, 415e5bf6fb106, +, 4),
-        HEX_DBL(+, 1, b4c902e273a58, +, 5),
-        HEX_DBL(+, 1, 28d389970338f, +, 7),
-        HEX_DBL(+, 1, 936dc5690c08f, +, 8),
-        HEX_DBL(+, 1, 122885aaeddaa, +, 10),
-        HEX_DBL(+, 1, 749ea7d470c6e, +, 11),
-        HEX_DBL(+, 1, fa7157c470f82, +, 12),
-        HEX_DBL(+, 1, 5829dcf95056, +, 14),
-        HEX_DBL(+, 1, d3c4488ee4f7f, +, 15),
-        HEX_DBL(+, 1, 3de1654d37c9a, +, 17),
-        HEX_DBL(+, 1, b00b5916ac955, +, 18),
-        HEX_DBL(+, 1, 259ac48bf05d7, +, 20),
-        HEX_DBL(+, 1, 8f0ccafad2a87, +, 21),
-        HEX_DBL(+, 1, 0f2ebd0a8002, +, 23),
-        HEX_DBL(+, 1, 709348c0ea4f9, +, 24),
-        HEX_DBL(+, 1, f4f22091940bd, +, 25),
-        HEX_DBL(+, 1, 546d8f9ed26e1, +, 27),
-        HEX_DBL(+, 1, ceb088b68e804, +, 28),
-        HEX_DBL(+, 1, 3a6e1fd9eecfd, +, 30),
-        HEX_DBL(+, 1, ab5adb9c436, +, 31),
-        HEX_DBL(+, 1, 226af33b1fdc1, +, 33),
-        HEX_DBL(+, 1, 8ab7fb5475fb7, +, 34),
-        HEX_DBL(+, 1, 0c3d3920962c9, +, 36),
-        HEX_DBL(+, 1, 6c932696a6b5d, +, 37),
-        HEX_DBL(+, 1, ef822f7f6731d, +, 38),
-        HEX_DBL(+, 1, 50bba3796379a, +, 40),
-        HEX_DBL(+, 1, c9aae4631c056, +, 41),
-        HEX_DBL(+, 1, 370470aec28ed, +, 43),
-        HEX_DBL(+, 1, a6b765d8cdf6d, +, 44),
-        HEX_DBL(+, 1, 1f43fcc4b662c, +, 46),
-        HEX_DBL(+, 1, 866f34a725782, +, 47),
-        HEX_DBL(+, 1, 0953e2f3a1ef7, +, 49),
-        HEX_DBL(+, 1, 689e221bc8d5b, +, 50),
-        HEX_DBL(+, 1, ea215a1d20d76, +, 51),
-        HEX_DBL(+, 1, 4d13fbb1a001a, +, 53),
-        HEX_DBL(+, 1, c4b334617cc67, +, 54),
-        HEX_DBL(+, 1, 33a43d282a519, +, 56),
-        HEX_DBL(+, 1, a220d397972eb, +, 57),
-        HEX_DBL(+, 1, 1c25c88df6862, +, 59),
-        HEX_DBL(+, 1, 8232558201159, +, 60),
-        HEX_DBL(+, 1, 0672a3c9eb871, +, 62),
-        HEX_DBL(+, 1, 64b41c6d37832, +, 63),
-        HEX_DBL(+, 1, e4cf766fe49be, +, 64),
-        HEX_DBL(+, 1, 49767bc0483e3, +, 66),
-        HEX_DBL(+, 1, bfc951eb8bb76, +, 67),
-        HEX_DBL(+, 1, 304d6aeca254b, +, 69),
-        HEX_DBL(+, 1, 9d97010884251, +, 70),
-        HEX_DBL(+, 1, 19103e4080b45, +, 72),
-        HEX_DBL(+, 1, 7e013cd114461, +, 73),
-        HEX_DBL(+, 1, 03996528e074c, +, 75),
-        HEX_DBL(+, 1, 60d4f6fdac731, +, 76),
-        HEX_DBL(+, 1, df8c5af17ba3b, +, 77),
-        HEX_DBL(+, 1, 45e3076d61699, +, 79),
-        HEX_DBL(+, 1, baed16a6e0da7, +, 80),
-        HEX_DBL(+, 1, 2cffdfebde1a1, +, 82),
-        HEX_DBL(+, 1, 9919cabefcb69, +, 83),
-        HEX_DBL(+, 1, 160345c9953e3, +, 85),
-        HEX_DBL(+, 1, 79dbc9dc53c66, +, 86),
-        HEX_DBL(+, 1, 00c810d464097, +, 88),
-        HEX_DBL(+, 1, 5d009394c5c27, +, 89),
-        HEX_DBL(+, 1, da57de8f107a8, +, 90),
-        HEX_DBL(+, 1, 425982cf597cd, +, 92),
-        HEX_DBL(+, 1, b61e5ca3a5e31, +, 93),
-        HEX_DBL(+, 1, 29bb825dfcf87, +, 95),
-        HEX_DBL(+, 1, 94a90db0d6fe2, +, 96),
-        HEX_DBL(+, 1, 12fec759586fd, +, 98),
-        HEX_DBL(+, 1, 75c1dc469e3af, +, 99),
-        HEX_DBL(+, 1, fbfd219c43b04, +, 100),
-        HEX_DBL(+, 1, 5936d44e1a146, +, 102),
-        HEX_DBL(+, 1, d531d8a7ee79c, +, 103),
-        HEX_DBL(+, 1, 3ed9d24a2d51b, +, 105),
-        HEX_DBL(+, 1, b15cfe5b6e17b, +, 106),
-        HEX_DBL(+, 1, 268038c2c0e, +, 108),
-        HEX_DBL(+, 1, 9044a73545d48, +, 109),
-        HEX_DBL(+, 1, 1002ab6218b38, +, 111),
-        HEX_DBL(+, 1, 71b3540cbf921, +, 112),
-        HEX_DBL(+, 1, f6799ea9c414a, +, 113),
-        HEX_DBL(+, 1, 55779b984f3eb, +, 115),
-        HEX_DBL(+, 1, d01a210c44aa4, +, 116),
-        HEX_DBL(+, 1, 3b63da8e9121, +, 118),
-        HEX_DBL(+, 1, aca8d6b0116b8, +, 119),
-        HEX_DBL(+, 1, 234de9e0c74e9, +, 121),
-        HEX_DBL(+, 1, 8bec7503ca477, +, 122),
-        HEX_DBL(+, 1, 0d0eda9796b9, +, 124),
-        HEX_DBL(+, 1, 6db0118477245, +, 125),
-        HEX_DBL(+, 1, f1056dc7bf22d, +, 126),
-        HEX_DBL(+, 1, 51c2cc3433801, +, 128),
-        HEX_DBL(+, 1, cb108ffbec164, +, 129),
-        HEX_DBL(+, 1, 37f780991b584, +, 131),
-        HEX_DBL(+, 1, a801c0ea8ac4d, +, 132),
-        HEX_DBL(+, 1, 20247cc4c46c1, +, 134),
-        HEX_DBL(+, 1, 87a0553328015, +, 135),
-        HEX_DBL(+, 1, 0a233dee4f9bb, +, 137),
-        HEX_DBL(+, 1, 69b7f55b808ba, +, 138),
-        HEX_DBL(+, 1, eba064644060a, +, 139),
-        HEX_DBL(+, 1, 4e184933d9364, +, 141),
-        HEX_DBL(+, 1, c614fe2531841, +, 142),
-        HEX_DBL(+, 1, 3494a9b171bf5, +, 144),
-        HEX_DBL(+, 1, a36798b9d969b, +, 145),
-        HEX_DBL(+, 1, 1d03d8c0c04af, +, 147),
-        HEX_DBL(+, 1, 836026385c974, +, 148),
-        HEX_DBL(+, 1, 073fbe9ac901d, +, 150),
-        HEX_DBL(+, 1, 65cae0969f286, +, 151),
-        HEX_DBL(+, 1, e64a58639cae8, +, 152),
-        HEX_DBL(+, 1, 4a77f5f9b50f9, +, 154),
-        HEX_DBL(+, 1, c12744a3a28e3, +, 155),
-        HEX_DBL(+, 1, 313b3b6978e85, +, 157),
-        HEX_DBL(+, 1, 9eda3a31e587e, +, 158),
-        HEX_DBL(+, 1, 19ebe56b56453, +, 160),
-        HEX_DBL(+, 1, 7f2bc6e599b7e, +, 161),
-        HEX_DBL(+, 1, 04644610df2ff, +, 163),
-        HEX_DBL(+, 1, 61e8b490ac4e6, +, 164),
-        HEX_DBL(+, 1, e103201f299b3, +, 165),
-        HEX_DBL(+, 1, 46e1b637beaf5, +, 167),
-        HEX_DBL(+, 1, bc473cfede104, +, 168),
-        HEX_DBL(+, 1, 2deb1b9c85e2d, +, 170),
-        HEX_DBL(+, 1, 9a5981ca67d1, +, 171),
-        HEX_DBL(+, 1, 16dc8a9ef670b, +, 173),
-        HEX_DBL(+, 1, 7b03166942309, +, 174),
-        HEX_DBL(+, 1, 0190be03150a7, +, 176),
-        HEX_DBL(+, 1, 5e1152f9a8119, +, 177),
-        HEX_DBL(+, 1, dbca9263f8487, +, 178),
-        HEX_DBL(+, 1, 43556dee93bee, +, 180),
-        HEX_DBL(+, 1, b774c12967dfa, +, 181),
-        HEX_DBL(+, 1, 2aa4306e922c2, +, 183),
-        HEX_DBL(+, 1, 95e54c5dd4217, +, 184)
-    };
+    static const double exp_table[128+150+1] =
+    {
+        HEX_DBL( +, 1, 82e16284f5ec5, -, 217 ),    HEX_DBL( +, 1, 06e9996332ba1, -, 215 ),
+        HEX_DBL( +, 1, 6555cb289e44b, -, 214 ),    HEX_DBL( +, 1, e5ab364643354, -, 213 ),
+        HEX_DBL( +, 1, 4a0bd18e64df7, -, 211 ),    HEX_DBL( +, 1, c094499cc578e, -, 210 ),
+        HEX_DBL( +, 1, 30d759323998c, -, 208 ),    HEX_DBL( +, 1, 9e5278ab1d4cf, -, 207 ),
+        HEX_DBL( +, 1, 198fa3f30be25, -, 205 ),    HEX_DBL( +, 1, 7eae636d6144e, -, 204 ),
+        HEX_DBL( +, 1, 040f1036f4863, -, 202 ),    HEX_DBL( +, 1, 6174e477a895f, -, 201 ),
+        HEX_DBL( +, 1, e065b82dd95a,  -, 200 ),    HEX_DBL( +, 1, 4676be491d129, -, 198 ),
+        HEX_DBL( +, 1, bbb5da5f7c823, -, 197 ),    HEX_DBL( +, 1, 2d884eef5fdcb, -, 195 ),
+        HEX_DBL( +, 1, 99d3397ab8371, -, 194 ),    HEX_DBL( +, 1, 1681497ed15b3, -, 192 ),
+        HEX_DBL( +, 1, 7a870f597fdbd, -, 191 ),    HEX_DBL( +, 1, 013c74edba307, -, 189 ),
+        HEX_DBL( +, 1, 5d9ec4ada7938, -, 188 ),    HEX_DBL( +, 1, db2edfd20fa7c, -, 187 ),
+        HEX_DBL( +, 1, 42eb9f39afb0b, -, 185 ),    HEX_DBL( +, 1, b6e4f282b43f4, -, 184 ),
+        HEX_DBL( +, 1, 2a42764857b19, -, 182 ),    HEX_DBL( +, 1, 9560792d19314, -, 181 ),
+        HEX_DBL( +, 1, 137b6ce8e052c, -, 179 ),    HEX_DBL( +, 1, 766b45dd84f18, -, 178 ),
+        HEX_DBL( +, 1, fce362fe6e7d,  -, 177 ),    HEX_DBL( +, 1, 59d34dd8a5473, -, 175 ),
+        HEX_DBL( +, 1, d606847fc727a, -, 174 ),    HEX_DBL( +, 1, 3f6a58b795de3, -, 172 ),
+        HEX_DBL( +, 1, b2216c6efdac1, -, 171 ),    HEX_DBL( +, 1, 2705b5b153fb8, -, 169 ),
+        HEX_DBL( +, 1, 90fa1509bd50d, -, 168 ),    HEX_DBL( +, 1, 107df698da211, -, 166 ),
+        HEX_DBL( +, 1, 725ae6e7b9d35, -, 165 ),    HEX_DBL( +, 1, f75d6040aeff6, -, 164 ),
+        HEX_DBL( +, 1, 56126259e093c, -, 162 ),    HEX_DBL( +, 1, d0ec7df4f7bd4, -, 161 ),
+        HEX_DBL( +, 1, 3bf2cf6722e46, -, 159 ),    HEX_DBL( +, 1, ad6b22f55db42, -, 158 ),
+        HEX_DBL( +, 1, 23d1f3e5834a,  -, 156 ),    HEX_DBL( +, 1, 8c9feab89b876, -, 155 ),
+        HEX_DBL( +, 1, 0d88cf37f00dd, -, 153 ),    HEX_DBL( +, 1, 6e55d2bf838a7, -, 152 ),
+        HEX_DBL( +, 1, f1e6b68529e33, -, 151 ),    HEX_DBL( +, 1, 525be4e4e601d, -, 149 ),
+        HEX_DBL( +, 1, cbe0a45f75eb1, -, 148 ),    HEX_DBL( +, 1, 3884e838aea68, -, 146 ),
+        HEX_DBL( +, 1, a8c1f14e2af5d, -, 145 ),    HEX_DBL( +, 1, 20a717e64a9bd, -, 143 ),
+        HEX_DBL( +, 1, 8851d84118908, -, 142 ),    HEX_DBL( +, 1, 0a9bdfb02d24,  -, 140 ),
+        HEX_DBL( +, 1, 6a5bea046b42e, -, 139 ),    HEX_DBL( +, 1, ec7f3b269efa8, -, 138 ),
+        HEX_DBL( +, 1, 4eafb87eab0f2, -, 136 ),    HEX_DBL( +, 1, c6e2d05bbc,    -, 135 ),
+        HEX_DBL( +, 1, 35208867c2683, -, 133 ),    HEX_DBL( +, 1, a425b317eeacd, -, 132 ),
+        HEX_DBL( +, 1, 1d8508fa8246a, -, 130 ),    HEX_DBL( +, 1, 840fbc08fdc8a, -, 129 ),
+        HEX_DBL( +, 1, 07b7112bc1ffe, -, 127 ),    HEX_DBL( +, 1, 666d0dad2961d, -, 126 ),
+        HEX_DBL( +, 1, e726c3f64d0fe, -, 125 ),    HEX_DBL( +, 1, 4b0dc07cabf98, -, 123 ),
+        HEX_DBL( +, 1, c1f2daf3b6a46, -, 122 ),    HEX_DBL( +, 1, 31c5957a47de2, -, 120 ),
+        HEX_DBL( +, 1, 9f96445648b9f, -, 119 ),    HEX_DBL( +, 1, 1a6baeadb4fd1, -, 117 ),
+        HEX_DBL( +, 1, 7fd974d372e45, -, 116 ),    HEX_DBL( +, 1, 04da4d1452919, -, 114 ),
+        HEX_DBL( +, 1, 62891f06b345,  -, 113 ),    HEX_DBL( +, 1, e1dd273aa8a4a, -, 112 ),
+        HEX_DBL( +, 1, 4775e0840bfdd, -, 110 ),    HEX_DBL( +, 1, bd109d9d94bda, -, 109 ),
+        HEX_DBL( +, 1, 2e73f53fba844, -, 107 ),    HEX_DBL( +, 1, 9b138170d6bfe, -, 106 ),
+        HEX_DBL( +, 1, 175af0cf60ec5, -, 104 ),    HEX_DBL( +, 1, 7baee1bffa80b, -, 103 ),
+        HEX_DBL( +, 1, 02057d1245ceb, -, 101 ),    HEX_DBL( +, 1, 5eafffb34ba31, -, 100 ),
+        HEX_DBL( +, 1, dca23bae16424, -, 99 ),    HEX_DBL( +, 1, 43e7fc88b8056, -, 97 ),
+        HEX_DBL( +, 1, b83bf23a9a9eb, -, 96 ),    HEX_DBL( +, 1, 2b2b8dd05b318, -, 94 ),
+        HEX_DBL( +, 1, 969d47321e4cc, -, 93 ),    HEX_DBL( +, 1, 1452b7723aed2, -, 91 ),
+        HEX_DBL( +, 1, 778fe2497184c, -, 90 ),    HEX_DBL( +, 1, fe7116182e9cc, -, 89 ),
+        HEX_DBL( +, 1, 5ae191a99585a, -, 87 ),    HEX_DBL( +, 1, d775d87da854d, -, 86 ),
+        HEX_DBL( +, 1, 4063f8cc8bb98, -, 84 ),    HEX_DBL( +, 1, b374b315f87c1, -, 83 ),
+        HEX_DBL( +, 1, 27ec458c65e3c, -, 81 ),    HEX_DBL( +, 1, 923372c67a074, -, 80 ),
+        HEX_DBL( +, 1, 1152eaeb73c08, -, 78 ),    HEX_DBL( +, 1, 737c5645114b5, -, 77 ),
+        HEX_DBL( +, 1, f8e6c24b5592e, -, 76 ),    HEX_DBL( +, 1, 571db733a9d61, -, 74 ),
+        HEX_DBL( +, 1, d257d547e083f, -, 73 ),    HEX_DBL( +, 1, 3ce9b9de78f85, -, 71 ),
+        HEX_DBL( +, 1, aebabae3a41b5, -, 70 ),    HEX_DBL( +, 1, 24b6031b49bda, -, 68 ),
+        HEX_DBL( +, 1, 8dd5e1bb09d7e, -, 67 ),    HEX_DBL( +, 1, 0e5b73d1ff53d, -, 65 ),
+        HEX_DBL( +, 1, 6f741de1748ec, -, 64 ),    HEX_DBL( +, 1, f36bd37f42f3e, -, 63 ),
+        HEX_DBL( +, 1, 536452ee2f75c, -, 61 ),    HEX_DBL( +, 1, cd480a1b7482,  -, 60 ),
+        HEX_DBL( +, 1, 39792499b1a24, -, 58 ),    HEX_DBL( +, 1, aa0de4bf35b38, -, 57 ),
+        HEX_DBL( +, 1, 2188ad6ae3303, -, 55 ),    HEX_DBL( +, 1, 898471fca6055, -, 54 ),
+        HEX_DBL( +, 1, 0b6c3afdde064, -, 52 ),    HEX_DBL( +, 1, 6b7719a59f0e,  -, 51 ),
+        HEX_DBL( +, 1, ee001eed62aa, -, 50 ),    HEX_DBL( +, 1, 4fb547c775da8, -, 48 ),
+        HEX_DBL( +, 1, c8464f7616468, -, 47 ),    HEX_DBL( +, 1, 36121e24d3bba, -, 45 ),
+        HEX_DBL( +, 1, a56e0c2ac7f75, -, 44 ),    HEX_DBL( +, 1, 1e642baeb84a,  -, 42 ),
+        HEX_DBL( +, 1, 853f01d6d53ba, -, 41 ),    HEX_DBL( +, 1, 0885298767e9a, -, 39 ),
+        HEX_DBL( +, 1, 67852a7007e42, -, 38 ),    HEX_DBL( +, 1, e8a37a45fc32e, -, 37 ),
+        HEX_DBL( +, 1, 4c1078fe9228a, -, 35 ),    HEX_DBL( +, 1, c3527e433fab1, -, 34 ),
+        HEX_DBL( +, 1, 32b48bf117da2, -, 32 ),    HEX_DBL( +, 1, a0db0d0ddb3ec, -, 31 ),
+        HEX_DBL( +, 1, 1b48655f37267, -, 29 ),    HEX_DBL( +, 1, 81056ff2c5772, -, 28 ),
+        HEX_DBL( +, 1, 05a628c699fa1, -, 26 ),    HEX_DBL( +, 1, 639e3175a689d, -, 25 ),
+        HEX_DBL( +, 1, e355bbaee85cb, -, 24 ),    HEX_DBL( +, 1, 4875ca227ec38, -, 22 ),
+        HEX_DBL( +, 1, be6c6fdb01612, -, 21 ),    HEX_DBL( +, 1, 2f6053b981d98, -, 19 ),
+        HEX_DBL( +, 1, 9c54c3b43bc8b, -, 18 ),    HEX_DBL( +, 1, 18354238f6764, -, 16 ),
+        HEX_DBL( +, 1, 7cd79b5647c9b, -, 15 ),    HEX_DBL( +, 1, 02cf22526545a, -, 13 ),
+        HEX_DBL( +, 1, 5fc21041027ad, -, 12 ),    HEX_DBL( +, 1, de16b9c24a98f, -, 11 ),
+        HEX_DBL( +, 1, 44e51f113d4d6, -, 9 ),    HEX_DBL( +, 1, b993fe00d5376, -, 8 ),
+        HEX_DBL( +, 1, 2c155b8213cf4, -, 6 ),    HEX_DBL( +, 1, 97db0ccceb0af, -, 5 ),
+        HEX_DBL( +, 1, 152aaa3bf81cc, -, 3 ),    HEX_DBL( +, 1, 78b56362cef38, -, 2 ),
+        HEX_DBL( +, 1, 0, +, 0 ),                HEX_DBL( +, 1, 5bf0a8b145769, +, 1 ),
+        HEX_DBL( +, 1, d8e64b8d4ddae, +, 2 ),    HEX_DBL( +, 1, 415e5bf6fb106, +, 4 ),
+        HEX_DBL( +, 1, b4c902e273a58, +, 5 ),    HEX_DBL( +, 1, 28d389970338f, +, 7 ),
+        HEX_DBL( +, 1, 936dc5690c08f, +, 8 ),    HEX_DBL( +, 1, 122885aaeddaa, +, 10 ),
+        HEX_DBL( +, 1, 749ea7d470c6e, +, 11 ),    HEX_DBL( +, 1, fa7157c470f82, +, 12 ),
+        HEX_DBL( +, 1, 5829dcf95056,  +, 14 ),    HEX_DBL( +, 1, d3c4488ee4f7f, +, 15 ),
+        HEX_DBL( +, 1, 3de1654d37c9a, +, 17 ),    HEX_DBL( +, 1, b00b5916ac955, +, 18 ),
+        HEX_DBL( +, 1, 259ac48bf05d7, +, 20 ),    HEX_DBL( +, 1, 8f0ccafad2a87, +, 21 ),
+        HEX_DBL( +, 1, 0f2ebd0a8002,  +, 23 ),    HEX_DBL( +, 1, 709348c0ea4f9, +, 24 ),
+        HEX_DBL( +, 1, f4f22091940bd, +, 25 ),    HEX_DBL( +, 1, 546d8f9ed26e1, +, 27 ),
+        HEX_DBL( +, 1, ceb088b68e804, +, 28 ),    HEX_DBL( +, 1, 3a6e1fd9eecfd, +, 30 ),
+        HEX_DBL( +, 1, ab5adb9c436,   +, 31 ),    HEX_DBL( +, 1, 226af33b1fdc1, +, 33 ),
+        HEX_DBL( +, 1, 8ab7fb5475fb7, +, 34 ),    HEX_DBL( +, 1, 0c3d3920962c9, +, 36 ),
+        HEX_DBL( +, 1, 6c932696a6b5d, +, 37 ),    HEX_DBL( +, 1, ef822f7f6731d, +, 38 ),
+        HEX_DBL( +, 1, 50bba3796379a, +, 40 ),    HEX_DBL( +, 1, c9aae4631c056, +, 41 ),
+        HEX_DBL( +, 1, 370470aec28ed, +, 43 ),    HEX_DBL( +, 1, a6b765d8cdf6d, +, 44 ),
+        HEX_DBL( +, 1, 1f43fcc4b662c, +, 46 ),    HEX_DBL( +, 1, 866f34a725782, +, 47 ),
+        HEX_DBL( +, 1, 0953e2f3a1ef7, +, 49 ),    HEX_DBL( +, 1, 689e221bc8d5b, +, 50 ),
+        HEX_DBL( +, 1, ea215a1d20d76, +, 51 ),    HEX_DBL( +, 1, 4d13fbb1a001a, +, 53 ),
+        HEX_DBL( +, 1, c4b334617cc67, +, 54 ),    HEX_DBL( +, 1, 33a43d282a519, +, 56 ),
+        HEX_DBL( +, 1, a220d397972eb, +, 57 ),    HEX_DBL( +, 1, 1c25c88df6862, +, 59 ),
+        HEX_DBL( +, 1, 8232558201159, +, 60 ),    HEX_DBL( +, 1, 0672a3c9eb871, +, 62 ),
+        HEX_DBL( +, 1, 64b41c6d37832, +, 63 ),    HEX_DBL( +, 1, e4cf766fe49be, +, 64 ),
+        HEX_DBL( +, 1, 49767bc0483e3, +, 66 ),    HEX_DBL( +, 1, bfc951eb8bb76, +, 67 ),
+        HEX_DBL( +, 1, 304d6aeca254b, +, 69 ),    HEX_DBL( +, 1, 9d97010884251, +, 70 ),
+        HEX_DBL( +, 1, 19103e4080b45, +, 72 ),    HEX_DBL( +, 1, 7e013cd114461, +, 73 ),
+        HEX_DBL( +, 1, 03996528e074c, +, 75 ),    HEX_DBL( +, 1, 60d4f6fdac731, +, 76 ),
+        HEX_DBL( +, 1, df8c5af17ba3b, +, 77 ),    HEX_DBL( +, 1, 45e3076d61699, +, 79 ),
+        HEX_DBL( +, 1, baed16a6e0da7, +, 80 ),    HEX_DBL( +, 1, 2cffdfebde1a1, +, 82 ),
+        HEX_DBL( +, 1, 9919cabefcb69, +, 83 ),    HEX_DBL( +, 1, 160345c9953e3, +, 85 ),
+        HEX_DBL( +, 1, 79dbc9dc53c66, +, 86 ),    HEX_DBL( +, 1, 00c810d464097, +, 88 ),
+        HEX_DBL( +, 1, 5d009394c5c27, +, 89 ),    HEX_DBL( +, 1, da57de8f107a8, +, 90 ),
+        HEX_DBL( +, 1, 425982cf597cd, +, 92 ),    HEX_DBL( +, 1, b61e5ca3a5e31, +, 93 ),
+        HEX_DBL( +, 1, 29bb825dfcf87, +, 95 ),    HEX_DBL( +, 1, 94a90db0d6fe2, +, 96 ),
+        HEX_DBL( +, 1, 12fec759586fd, +, 98 ),    HEX_DBL( +, 1, 75c1dc469e3af, +, 99 ),
+        HEX_DBL( +, 1, fbfd219c43b04, +, 100 ),    HEX_DBL( +, 1, 5936d44e1a146, +, 102 ),
+        HEX_DBL( +, 1, d531d8a7ee79c, +, 103 ),    HEX_DBL( +, 1, 3ed9d24a2d51b, +, 105 ),
+        HEX_DBL( +, 1, b15cfe5b6e17b, +, 106 ),    HEX_DBL( +, 1, 268038c2c0e,   +, 108 ),
+        HEX_DBL( +, 1, 9044a73545d48, +, 109 ),    HEX_DBL( +, 1, 1002ab6218b38, +, 111 ),
+        HEX_DBL( +, 1, 71b3540cbf921, +, 112 ),    HEX_DBL( +, 1, f6799ea9c414a, +, 113 ),
+        HEX_DBL( +, 1, 55779b984f3eb, +, 115 ),    HEX_DBL( +, 1, d01a210c44aa4, +, 116 ),
+        HEX_DBL( +, 1, 3b63da8e9121,  +, 118 ),    HEX_DBL( +, 1, aca8d6b0116b8, +, 119 ),
+        HEX_DBL( +, 1, 234de9e0c74e9, +, 121 ),    HEX_DBL( +, 1, 8bec7503ca477, +, 122 ),
+        HEX_DBL( +, 1, 0d0eda9796b9,  +, 124 ),    HEX_DBL( +, 1, 6db0118477245, +, 125 ),
+        HEX_DBL( +, 1, f1056dc7bf22d, +, 126 ),    HEX_DBL( +, 1, 51c2cc3433801, +, 128 ),
+        HEX_DBL( +, 1, cb108ffbec164, +, 129 ),    HEX_DBL( +, 1, 37f780991b584, +, 131 ),
+        HEX_DBL( +, 1, a801c0ea8ac4d, +, 132 ),    HEX_DBL( +, 1, 20247cc4c46c1, +, 134 ),
+        HEX_DBL( +, 1, 87a0553328015, +, 135 ),    HEX_DBL( +, 1, 0a233dee4f9bb, +, 137 ),
+        HEX_DBL( +, 1, 69b7f55b808ba, +, 138 ),    HEX_DBL( +, 1, eba064644060a, +, 139 ),
+        HEX_DBL( +, 1, 4e184933d9364, +, 141 ),    HEX_DBL( +, 1, c614fe2531841, +, 142 ),
+        HEX_DBL( +, 1, 3494a9b171bf5, +, 144 ),    HEX_DBL( +, 1, a36798b9d969b, +, 145 ),
+        HEX_DBL( +, 1, 1d03d8c0c04af, +, 147 ),    HEX_DBL( +, 1, 836026385c974, +, 148 ),
+        HEX_DBL( +, 1, 073fbe9ac901d, +, 150 ),    HEX_DBL( +, 1, 65cae0969f286, +, 151 ),
+        HEX_DBL( +, 1, e64a58639cae8, +, 152 ),    HEX_DBL( +, 1, 4a77f5f9b50f9, +, 154 ),
+        HEX_DBL( +, 1, c12744a3a28e3, +, 155 ),    HEX_DBL( +, 1, 313b3b6978e85, +, 157 ),
+        HEX_DBL( +, 1, 9eda3a31e587e, +, 158 ),    HEX_DBL( +, 1, 19ebe56b56453, +, 160 ),
+        HEX_DBL( +, 1, 7f2bc6e599b7e, +, 161 ),    HEX_DBL( +, 1, 04644610df2ff, +, 163 ),
+        HEX_DBL( +, 1, 61e8b490ac4e6, +, 164 ),    HEX_DBL( +, 1, e103201f299b3, +, 165 ),
+        HEX_DBL( +, 1, 46e1b637beaf5, +, 167 ),    HEX_DBL( +, 1, bc473cfede104, +, 168 ),
+        HEX_DBL( +, 1, 2deb1b9c85e2d, +, 170 ),    HEX_DBL( +, 1, 9a5981ca67d1,  +, 171 ),
+        HEX_DBL( +, 1, 16dc8a9ef670b, +, 173 ),    HEX_DBL( +, 1, 7b03166942309, +, 174 ),
+        HEX_DBL( +, 1, 0190be03150a7, +, 176 ),    HEX_DBL( +, 1, 5e1152f9a8119, +, 177 ),
+        HEX_DBL( +, 1, dbca9263f8487, +, 178 ),    HEX_DBL( +, 1, 43556dee93bee, +, 180 ),
+        HEX_DBL( +, 1, b774c12967dfa, +, 181 ),    HEX_DBL( +, 1, 2aa4306e922c2, +, 183 ),
+        HEX_DBL( +, 1, 95e54c5dd4217, +, 184 )    };
-    // scale by e**i --  (expm1(f) + 1)*e**i - 1  = expm1(f) * e**i + e**i - 1 =
-    // e**i
-    return exp_table[exponent + 150] + (f * exp_table[exponent + 150] - 1.0);
+    // scale by e**i --  (expm1(f) + 1)*e**i - 1  = expm1(f) * e**i + e**i - 1 = e**i
+    return exp_table[exponent+150] + (f * exp_table[exponent+150] - 1.0);
-double reference_fmax(double x, double y)
+double reference_fmax( double x, double y )
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
     return x >= y ? x : y;
-double reference_fmin(double x, double y)
+double reference_fmin( double x, double y )
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
     return x <= y ? x : y;
-double reference_hypot(double x, double y)
+double reference_hypot( double x, double y )
-    // Since the inputs are actually floats, we don't have to worry about range
-    // here
-    if (isinf(x) || isinf(y)) return INFINITY;
+    // Since the inputs are actually floats, we don't have to worry about range here
+    if( isinf(x) || isinf(y) )
+        return INFINITY;
-    return sqrt(x * x + y * y);
+    return sqrt( x * x + y * y );
-int reference_ilogbl(long double x)
+int    reference_ilogbl( long double x)
     extern int gDeviceILogb0, gDeviceILogbNaN;
     // Since we are just using this to verify double precision, we can
     // use the double precision ilogb here
-    union {
-        double f;
-        cl_ulong u;
-    } u;
-    u.f = (double)x;
+    union { double f; cl_ulong u;} u;
+    u.f = (double) x;
     int exponent = (int)(u.u >> 52) & 0x7ff;
-    if (exponent == 0x7ff)
+    if( exponent == 0x7ff )
-        if (u.u & 0x000fffffffffffffULL) return gDeviceILogbNaN;
+        if( u.u & 0x000fffffffffffffULL )
+            return gDeviceILogbNaN;
         return CL_INT_MAX;
-    if (exponent == 0)
-    { // deal with denormals
-        u.f = x * HEX_DBL(+, 1, 0, +, 64);
+    if( exponent == 0 )
+    {   // deal with denormals
+        u.f =  x * HEX_DBL( +, 1, 0, +, 64 );
         exponent = (cl_uint)(u.u >> 52) & 0x7ff;
-        if (exponent == 0) return gDeviceILogb0;
+        if( exponent == 0 )
+            return gDeviceILogb0;
         exponent -= 1023 + 64;
         return exponent;
@@ -1694,105 +1516,84 @@
     return exponent - 1023;
-double reference_relaxed_log2(double x) { return reference_log2(x); }
+//double reference_log2( double x )
+//    return log( x ) * 1.44269504088896340735992468100189214;
-double reference_log2(double x)
+double reference_relaxed_log2( double x )
-    if (isnan(x) || x < 0.0 || x == -INFINITY) return cl_make_nan();
+  return reference_log2(x);
-    if (x == 0.0f) return -INFINITY;
+double reference_log2( double x )
+    if( isnan(x) || x < 0.0 || x == -INFINITY)
+        return cl_make_nan();
-    if (x == INFINITY) return INFINITY;
+    if( x == 0.0f)
+        return -INFINITY;
+    if( x == INFINITY )
+        return INFINITY;
     double hi, lo;
-    __log2_ep(&hi, &lo, x);
+    __log2_ep( &hi, &lo, x );
     return hi;
-double reference_log1p(double x)
-{ // This function is suitable only for verifying log1pf(). It produces several
-  // double precision ulps of error.
+double reference_log1p( double x )
+{   // This function is suitable only for verifying log1pf(). It produces several double precision ulps of error.
     // Handle small and NaN
-    if (!(reference_fabs(x) > HEX_DBL(+, 1, 0, -, 53))) return x;
+    if( ! ( reference_fabs(x) > HEX_DBL( +, 1, 0, -, 53 ) ) )
+        return x;
     // deal with special values
-    if (x <= -1.0)
+    if( x <= -1.0 )
-        if (x < -1.0) return cl_make_nan();
+        if( x < -1.0 )
+            return cl_make_nan();
         return -INFINITY;
     // infinity
-    if (x == INFINITY) return INFINITY;
+    if( x == INFINITY )
+        return INFINITY;
-    // High precision result for when near 0, to avoid problems with the
-    // reference result falling in the wrong binade.
-    if (reference_fabs(x) < HEX_DBL(+, 1, 0, -, 28)) return (1.0 - 0.5 * x) * x;
+    // High precision result for when near 0, to avoid problems with the reference result falling in the wrong binade.
+    if( reference_fabs(x) < HEX_DBL( +, 1, 0, -, 28 ) )
+        return (1.0 - 0.5 * x) * x;
     // Our polynomial is only good in the region +-2**-4.
     // If we aren't in that range then we need to reduce to be in that range
-    double correctionLo =
-        -0.0; // correction down stream to compensate for the reduction, if any
-    double correctionHi =
-        -0.0; // correction down stream to compensate for the exponent, if any
-    if (reference_fabs(x) > HEX_DBL(+, 1, 0, -, 4))
+    double correctionLo = -0.0;           // correction down stream to compensate for the reduction, if any
+    double correctionHi = -0.0;           // correction down stream to compensate for the exponent, if any
+    if( reference_fabs(x) > HEX_DBL( +, 1, 0, -, 4 ) )
-        x += 1.0; // double should cover any loss of precision here
+        x += 1.0;   // double should cover any loss of precision here
         // separate x into (1+f) * 2**i
-        union {
-            double d;
-            cl_ulong u;
-        } u;
-        u.d = x;
-        int i = (int)((u.u >> 52) & 0x7ff) - 1023;
+        union{ double d; cl_ulong u;} u;        u.d = x;
+        int i = (int) ((u.u >> 52) & 0x7ff) - 1023;
         u.u &= 0x000fffffffffffffULL;
-        int index = (int)(u.u >> 48);
+        int index = (int) (u.u >> 48 );
         u.u |= 0x3ff0000000000000ULL;
         double f = u.d;
         // further reduce f to be within 1/16 of 1.0
-        static const double scale_table[16] = {
-            1.0,
-            HEX_DBL(+, 1, d2d2d2d6e3f79, -, 1),
-            HEX_DBL(+, 1, b8e38e42737a1, -, 1),
-            HEX_DBL(+, 1, a1af28711adf3, -, 1),
-            HEX_DBL(+, 1, 8cccccd88dd65, -, 1),
-            HEX_DBL(+, 1, 79e79e810ec8f, -, 1),
-            HEX_DBL(+, 1, 68ba2e94df404, -, 1),
-            HEX_DBL(+, 1, 590b216defb29, -, 1),
-            HEX_DBL(+, 1, 4aaaaab1500ed, -, 1),
-            HEX_DBL(+, 1, 3d70a3e0d6f73, -, 1),
-            HEX_DBL(+, 1, 313b13bb39f4f, -, 1),
-            HEX_DBL(+, 1, 25ed09823f1cc, -, 1),
-            HEX_DBL(+, 1, 1b6db6e77457b, -, 1),
-            HEX_DBL(+, 1, 11a7b96a3a34f, -, 1),
-            HEX_DBL(+, 1, 0888888e46fea, -, 1),
-            HEX_DBL(+, 1, 00000038e9862, -, 1)
-        };
+        static const double scale_table[16] = {                  1.0, HEX_DBL( +, 1, d2d2d2d6e3f79, -, 1 ), HEX_DBL( +, 1, b8e38e42737a1, -, 1 ), HEX_DBL( +, 1, a1af28711adf3, -, 1 ),
+                                                HEX_DBL( +, 1, 8cccccd88dd65, -, 1 ), HEX_DBL( +, 1, 79e79e810ec8f, -, 1 ), HEX_DBL( +, 1, 68ba2e94df404, -, 1 ), HEX_DBL( +, 1, 590b216defb29, -, 1 ),
+                                                HEX_DBL( +, 1, 4aaaaab1500ed, -, 1 ), HEX_DBL( +, 1, 3d70a3e0d6f73, -, 1 ), HEX_DBL( +, 1, 313b13bb39f4f, -, 1 ), HEX_DBL( +, 1, 25ed09823f1cc, -, 1 ),
+                                                HEX_DBL( +, 1, 1b6db6e77457b, -, 1 ), HEX_DBL( +, 1, 11a7b96a3a34f, -, 1 ), HEX_DBL( +, 1, 0888888e46fea, -, 1 ), HEX_DBL( +, 1, 00000038e9862, -, 1 ) };
         // correction_table[i] = -log( scale_table[i] )
-        // All entries have >= 64 bits of precision (rather than the expected
-        // 53)
-        static const double correction_table[16] = {
-            -0.0,
-            HEX_DBL(+, 1, 7a5c722c16058, -, 4),
-            HEX_DBL(+, 1, 323db16c89ab1, -, 3),
-            HEX_DBL(+, 1, a0f87d180629, -, 3),
-            HEX_DBL(+, 1, 050279324e17c, -, 2),
-            HEX_DBL(+, 1, 36f885bb270b0, -, 2),
-            HEX_DBL(+, 1, 669b771b5cc69, -, 2),
-            HEX_DBL(+, 1, 94203a6292a05, -, 2),
-            HEX_DBL(+, 1, bfb4f9cb333a4, -, 2),
-            HEX_DBL(+, 1, e982376ddb80e, -, 2),
-            HEX_DBL(+, 1, 08d5d8769b2b2, -, 1),
-            HEX_DBL(+, 1, 1c288bc00e0cf, -, 1),
-            HEX_DBL(+, 1, 2ec7535b31ecb, -, 1),
-            HEX_DBL(+, 1, 40bed0adc63fb, -, 1),
-            HEX_DBL(+, 1, 521a5c0330615, -, 1),
-            HEX_DBL(+, 1, 62e42f7dd092c, -, 1)
-        };
+        // All entries have >= 64 bits of precision (rather than the expected 53)
+        static const double correction_table[16] = {                   -0.0, HEX_DBL( +, 1, 7a5c722c16058, -, 4 ), HEX_DBL( +, 1, 323db16c89ab1, -, 3 ), HEX_DBL( +, 1, a0f87d180629, -, 3 ),
+                                                       HEX_DBL( +, 1, 050279324e17c, -, 2 ), HEX_DBL( +, 1, 36f885bb270b0, -, 2 ), HEX_DBL( +, 1, 669b771b5cc69, -, 2 ), HEX_DBL( +, 1, 94203a6292a05, -, 2 ),
+                                                       HEX_DBL( +, 1, bfb4f9cb333a4, -, 2 ), HEX_DBL( +, 1, e982376ddb80e, -, 2 ), HEX_DBL( +, 1, 08d5d8769b2b2, -, 1 ), HEX_DBL( +, 1, 1c288bc00e0cf, -, 1 ),
+                                                       HEX_DBL( +, 1, 2ec7535b31ecb, -, 1 ), HEX_DBL( +, 1, 40bed0adc63fb, -, 1 ), HEX_DBL( +, 1, 521a5c0330615, -, 1 ), HEX_DBL( +, 1, 62e42f7dd092c, -, 1 ) };
         f *= scale_table[index];
         correctionLo = correction_table[index];
@@ -1804,25 +1605,17 @@
-    // minmax polynomial for p(x) = (log(x+1) - x)/x valid over the range x =
-    // [-1/16, 1/16]
+    // minmax polynomial for p(x) = (log(x+1) - x)/x valid over the range x = [-1/16, 1/16]
     //          max error HEX_DBL( +, 1, 048f61f9a5eca, -, 52 )
-    double p = HEX_DBL(-, 1, cc33de97a9d7b, -, 46)
-        + (HEX_DBL(-, 1, fffffffff3eb7, -, 2)
-           + (HEX_DBL(+, 1, 5555555633ef7, -, 2)
-              + (HEX_DBL(-, 1, 00000062c78, -, 2)
-                 + (HEX_DBL(+, 1, 9999958a3321, -, 3)
-                    + (HEX_DBL(-, 1, 55534ce65c347, -, 3)
-                       + (HEX_DBL(+, 1, 24957208391a5, -, 3)
-                          + (HEX_DBL(-, 1, 02287b9a5b4a1, -, 3)
-                             + HEX_DBL(+, 1, c757d922180ed, -, 4) * x)
-                              * x)
-                           * x)
-                        * x)
-                     * x)
-                  * x)
-               * x)
-            * x;
+    double p = HEX_DBL( -, 1, cc33de97a9d7b,  -, 46 ) +
+               (HEX_DBL( -, 1, fffffffff3eb7, -, 2 ) +
+               (HEX_DBL( +, 1, 5555555633ef7, -, 2 ) +
+               (HEX_DBL( -, 1, 00000062c78,   -, 2 ) +
+               (HEX_DBL( +, 1, 9999958a3321,  -, 3 ) +
+               (HEX_DBL( -, 1, 55534ce65c347, -, 3 ) +
+               (HEX_DBL( +, 1, 24957208391a5, -, 3 ) +
+               (HEX_DBL( -, 1, 02287b9a5b4a1, -, 3 ) +
+                HEX_DBL( +, 1, c757d922180ed, -, 4 ) * x)*x)*x)*x)*x)*x)*x)*x;
     // log(x+1) = x * p(x) + x
     x += x * p;
@@ -1830,23 +1623,22 @@
     return correctionHi + (correctionLo + x);
-double reference_logb(double x)
+double reference_logb( double x )
-    union {
-        float f;
-        cl_uint u;
-    } u;
-    u.f = (float)x;
+    union { float f; cl_uint u;} u;
+    u.f = (float) x;
     cl_int exponent = (u.u >> 23) & 0xff;
-    if (exponent == 0xff) return x * x;
+    if( exponent == 0xff )
+        return x * x;
-    if (exponent == 0)
-    { // deal with denormals
+    if( exponent == 0 )
+    {   // deal with denormals
         u.u = (u.u & 0x007fffff) | 0x3f800000;
         u.f -= 1.0f;
         exponent = (u.u >> 23) & 0xff;
-        if (exponent == 0) return -INFINITY;
+        if( exponent == 0 )
+            return -INFINITY;
         return exponent - (127 + 126);
@@ -1854,271 +1646,219 @@
     return exponent - 127;
-double reference_relaxed_reciprocal(double x) { return 1.0f / ((float)x); }
-double reference_reciprocal(double x) { return 1.0 / x; }
-double reference_remainder(double x, double y)
+double reference_relaxed_reciprocal(double x)
-    int i;
-    return reference_remquo(x, y, &i);
+  return 1.0f / ((float) x);
-double reference_lgamma(double x)
+double reference_reciprocal( double x )
-    /*
-     * ====================================================
-     * This function is from fdlibm.
-     * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     *
-     */
+  return 1.0 / x;
-    static const double // two52 = 4.50359962737049600000e+15, /* 0x43300000,
-                        // 0x00000000 */
-        half = 5.00000000000000000000e-01, /* 0x3FE00000,
-                                              0x00000000 */
-        one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
-        pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
-        a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
-        a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
-        a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
-        a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
-        a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
-        a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
-        a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
-        a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
-        a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
-        a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
-        a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
-        a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
-        tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
-        tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
-        /* tt = -(tail of tf) */
-        tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
-        t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
-        t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
-        t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
-        t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
-        t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
-        t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
-        t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
-        t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
-        t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
-        t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
-        t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
-        t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
-        t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
-        t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
-        t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
-        u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
-        u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
-        u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
-        u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
-        u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
-        u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
-        v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
-        v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
-        v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
-        v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
-        v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
-        s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
-        s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
-        s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
-        s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
-        s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
-        s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
-        s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
-        r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
-        r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
-        r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
-        r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
-        r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
-        r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
-        w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
-        w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
-        w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
-        w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
-        w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
-        w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
-        w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
+double reference_remainder( double x, double y )
+    int i;
+    return reference_remquo( x, y, &i );
-    static const double zero = 0.00000000000000000000e+00;
-    double t, y, z, nadj, p, p1, p2, p3, q, r, w;
-    cl_int i, hx, lx, ix;
+double reference_lgamma( double x)
+ * ====================================================
+ * This function is from fdlibm.
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ *
+ */
-    union {
-        double d;
-        cl_ulong u;
-    } u;
-    u.d = x;
+static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
+                    half=  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+                    one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+                    pi  =  3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
+                    a0  =  7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
+                    a1  =  3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
+                    a2  =  6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
+                    a3  =  2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
+                    a4  =  7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
+                    a5  =  2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
+                    a6  =  1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
+                    a7  =  5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
+                    a8  =  2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
+                    a9  =  1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
+                    a10 =  2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
+                    a11 =  4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
+                    tc  =  1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
+                    tf  = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
+                    /* tt = -(tail of tf) */
+                    tt  = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
+                    t0  =  4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
+                    t1  = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
+                    t2  =  6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
+                    t3  = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
+                    t4  =  1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
+                    t5  = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
+                    t6  =  6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
+                    t7  = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
+                    t8  =  2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
+                    t9  = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
+                    t10 =  8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
+                    t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
+                    t12 =  3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
+                    t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
+                    t14 =  3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
+                    u0  = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
+                    u1  =  6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
+                    u2  =  1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
+                    u3  =  9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
+                    u4  =  2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
+                    u5  =  1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
+                    v1  =  2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
+                    v2  =  2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
+                    v3  =  7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
+                    v4  =  1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
+                    v5  =  3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
+                    s0  = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
+                    s1  =  2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
+                    s2  =  3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
+                    s3  =  1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
+                    s4  =  2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
+                    s5  =  1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
+                    s6  =  3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
+                    r1  =  1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
+                    r2  =  7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
+                    r3  =  1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
+                    r4  =  1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
+                    r5  =  7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
+                    r6  =  7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
+                    w0  =  4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
+                    w1  =  8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
+                    w2  = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
+                    w3  =  7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
+                    w4  = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
+                    w5  =  8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
+                    w6  = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
-    hx = (cl_int)(u.u >> 32);
-    lx = (cl_int)(u.u & 0xffffffffULL);
+    static const double zero=  0.00000000000000000000e+00;
+    double t,y,z,nadj,p,p1,p2,p3,q,r,w;
+    cl_int i,hx,lx,ix;
+    union{ double d; cl_ulong u;}u; u.d = x;
+    hx = (cl_int) (u.u >> 32);
+    lx = (cl_int) (u.u & 0xffffffffULL);
     /* purge off +-inf, NaN, +-0, and negative arguments */
-    //    *signgamp = 1;
-    ix = hx & 0x7fffffff;
-    if (ix >= 0x7ff00000) return x * x;
-    if ((ix | lx) == 0) return INFINITY;
-    if (ix < 0x3b900000)
-    { /* |x|<2**-70, return -log(|x|) */
-        if (hx < 0)
-        {
-            //            *signgamp = -1;
+//    *signgamp = 1;
+    ix = hx&0x7fffffff;
+    if(ix>=0x7ff00000) return x*x;
+    if((ix|lx)==0) return INFINITY;
+    if(ix<0x3b900000) {    /* |x|<2**-70, return -log(|x|) */
+        if(hx<0) {
+//            *signgamp = -1;
             return -reference_log(-x);
-        }
-        else
-            return -reference_log(x);
+        } else return -reference_log(x);
-    if (hx < 0)
-    {
-        if (ix >= 0x43300000) /* |x|>=2**52, must be -integer */
-            return INFINITY;
+    if(hx<0) {
+        if(ix>=0x43300000)     /* |x|>=2**52, must be -integer */
+        return INFINITY;
         t = reference_sinpi(x);
-        if (t == zero) return INFINITY; /* -integer */
-        nadj = reference_log(pi / reference_fabs(t * x));
-        //        if(t<zero) *signgamp = -1;
+        if(t==zero) return INFINITY; /* -integer */
+        nadj = reference_log(pi/reference_fabs(t*x));
+//        if(t<zero) *signgamp = -1;
         x = -x;
     /* purge off 1 and 2 */
-    if ((((ix - 0x3ff00000) | lx) == 0) || (((ix - 0x40000000) | lx) == 0))
-        r = 0;
+    if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
     /* for x < 2.0 */
-    else if (ix < 0x40000000)
-    {
-        if (ix <= 0x3feccccc)
-        { /* lgamma(x) = lgamma(x+1)-log(x) */
-            r = -reference_log(x);
-            if (ix >= 0x3FE76944)
-            {
-                y = 1.0 - x;
-                i = 0;
-            }
-            else if (ix >= 0x3FCDA661)
-            {
-                y = x - (tc - one);
-                i = 1;
-            }
-            else
-            {
-                y = x;
-                i = 2;
-            }
+    else if(ix<0x40000000) {
+        if(ix<=0x3feccccc) {     /* lgamma(x) = lgamma(x+1)-log(x) */
+        r = -reference_log(x);
+        if(ix>=0x3FE76944) {y = 1.0-x; i= 0;}
+        else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;}
+          else {y = x; i=2;}
+        } else {
+          r = zero;
+            if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */
+            else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */
+        else {y=x-one;i=2;}
-        else
-        {
-            r = zero;
-            if (ix >= 0x3FFBB4C3)
-            {
-                y = 2.0 - x;
-                i = 0;
-            } /* [1.7316,2] */
-            else if (ix >= 0x3FF3B4C4)
-            {
-                y = x - tc;
-                i = 1;
-            } /* [1.23,1.73] */
-            else
-            {
-                y = x - one;
-                i = 2;
-            }
-        }
-        switch (i)
-        {
-            case 0:
-                z = y * y;
-                p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
-                p2 = z
-                    * (a1
-                       + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
-                p = y * p1 + p2;
-                r += (p - 0.5 * y);
-                break;
-            case 1:
-                z = y * y;
-                w = z * y;
-                p1 = t0
-                    + w
-                        * (t3
-                           + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
-                p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
-                p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
-                p = z * p1 - (tt - w * (p2 + y * p3));
-                r += (tf + p);
-                break;
-            case 2:
-                p1 = y
-                    * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
-                p2 = one + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
-                r += (-0.5 * y + p1 / p2);
+        switch(i) {
+          case 0:
+        z = y*y;
+        p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
+        p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
+        p  = y*p1+p2;
+        r  += (p-0.5*y); break;
+          case 1:
+        z = y*y;
+        w = z*y;
+        p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12)));    /* parallel comp */
+        p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
+        p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
+        p  = z*p1-(tt-w*(p2+y*p3));
+        r += (tf + p); break;
+          case 2:
+        p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
+        p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
+        r += (-0.5*y + p1/p2);
-    else if (ix < 0x40200000)
-    { /* x < 8.0 */
+    else if(ix<0x40200000) {             /* x < 8.0 */
         i = (int)x;
         t = zero;
-        y = x - (double)i;
-        p = y
-            * (s0
-               + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
-        q = one + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
-        r = half * y + p / q;
-        z = one; /* lgamma(1+s) = log(s) + lgamma(s) */
-        switch (i)
-        {
-            case 7: z *= (y + 6.0); /* FALLTHRU */
-            case 6: z *= (y + 5.0); /* FALLTHRU */
-            case 5: z *= (y + 4.0); /* FALLTHRU */
-            case 4: z *= (y + 3.0); /* FALLTHRU */
-            case 3:
-                z *= (y + 2.0); /* FALLTHRU */
-                r += reference_log(z);
-                break;
+        y = x-(double)i;
+        p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
+        q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
+        r = half*y+p/q;
+        z = one;    /* lgamma(1+s) = log(s) + lgamma(s) */
+        switch(i) {
+        case 7: z *= (y+6.0);    /* FALLTHRU */
+        case 6: z *= (y+5.0);    /* FALLTHRU */
+        case 5: z *= (y+4.0);    /* FALLTHRU */
+        case 4: z *= (y+3.0);    /* FALLTHRU */
+        case 3: z *= (y+2.0);    /* FALLTHRU */
+            r += reference_log(z); break;
-        /* 8.0 <= x < 2**58 */
-    }
-    else if (ix < 0x43900000)
-    {
+    /* 8.0 <= x < 2**58 */
+    } else if (ix < 0x43900000) {
         t = reference_log(x);
-        z = one / x;
-        y = z * z;
-        w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
-        r = (x - half) * (t - one) + w;
-    }
-    else
-        /* 2**58 <= x <= inf */
-        r = x * (reference_log(x) - one);
-    if (hx < 0) r = nadj - r;
+        z = one/x;
+        y = z*z;
+        w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
+        r = (x-half)*(t-one)+w;
+    } else
+    /* 2**58 <= x <= inf */
+        r =  x*(reference_log(x)-one);
+    if(hx<0) r = nadj - r;
     return r;
 #endif // _MSC_VER
-double reference_assignment(double x) { return x; }
+double reference_assignment( double x ){ return x; }
-int reference_not(double x)
+int reference_not( double x )
-    int r = !x;
-    return r;
+  int r = !x;
+  return r;
 #pragma mark -
 #pragma mark Double testing
 #ifndef M_PIL
-#define M_PIL                                                                  \
-    3.14159265358979323846264338327950288419716939937510582097494459230781640628620899L
+    #define M_PIL        3.14159265358979323846264338327950288419716939937510582097494459230781640628620899L
-static long double reduce1l(long double x);
+static long double reduce1l( long double x );
 #ifdef __PPC__
 // Since long double on PPC is really extended precision double arithmetic
@@ -2127,35 +1867,36 @@
 // such that reduction algorithm used for other architectures will not work.
 // Instead and alternate reduction method is used.
-static long double reduce1l(long double x)
+static long double reduce1l( long double x )
-    union {
-        long double ld;
-        double d[2];
-    } u;
+  union {
+    long double ld;
+    double d[2];
+  } u;
-    // Reduce the high and low halfs separately.
-    u.ld = x;
-    return ((long double)reduce1(u.d[0]) + reduce1(u.d[1]));
+  // Reduce the high and low halfs separately.
+  u.ld = x;
+  return ((long double)reduce1(u.d[0]) + reduce1(u.d[1]));
 #else // !__PPC__
-static long double reduce1l(long double x)
+static long double reduce1l( long double x )
     static long double unit_exp = 0;
-    if (0.0L == unit_exp) unit_exp = scalbnl(1.0L, LDBL_MANT_DIG);
+    if( 0.0L == unit_exp )
+        unit_exp = scalbnl( 1.0L, LDBL_MANT_DIG);
-    if (reference_fabsl(x) >= unit_exp)
+    if( reference_fabsl(x) >= unit_exp )
-        if (reference_fabsl(x) == INFINITY) return cl_make_nan();
+        if( reference_fabsl(x) == INFINITY )
+            return cl_make_nan();
-        return 0.0L; // we patch up the sign for sinPi and cosPi later, since
-                     // they need different signs
+        return 0.0L; //we patch up the sign for sinPi and cosPi later, since they need different signs
     // Find the nearest multiple of 2
-    const long double r = reference_copysignl(unit_exp, x);
+    const long double r = reference_copysignl( unit_exp, x );
     long double z = x + r;
     z -= r;
@@ -2164,31 +1905,19 @@
 #endif // __PPC__
-long double reference_acospil(long double x)
+long double reference_acospil( long double x){  return reference_acosl( x ) / M_PIL;    }
+long double reference_asinpil( long double x){  return reference_asinl( x ) / M_PIL;    }
+long double reference_atanpil( long double x){  return reference_atanl( x ) / M_PIL;    }
+long double reference_atan2pil( long double y, long double x){ return reference_atan2l( y, x) / M_PIL; }
+long double reference_cospil( long double x)
-    return reference_acosl(x) / M_PIL;
-long double reference_asinpil(long double x)
-    return reference_asinl(x) / M_PIL;
-long double reference_atanpil(long double x)
-    return reference_atanl(x) / M_PIL;
-long double reference_atan2pil(long double y, long double x)
-    return reference_atan2l(y, x) / M_PIL;
-long double reference_cospil(long double x)
-    if (reference_fabsl(x) >= HEX_LDBL(+, 1, 0, +, 54))
+    if( reference_fabsl(x) >= HEX_LDBL( +, 1, 0, +, 54 ) )
-        if (reference_fabsl(x) == INFINITY) return cl_make_nan();
+        if( reference_fabsl(x) == INFINITY )
+            return cl_make_nan();
-        // Note this probably fails for odd values between 0x1.0p52 and
-        // 0x1.0p53. However, when starting with single precision inputs, there
-        // will be no odd values.
+        //Note this probably fails for odd values between 0x1.0p52 and 0x1.0p53.
+        //However, when starting with single precision inputs, there will be no odd values.
         return 1.0L;
@@ -2200,9 +1929,9 @@
     // phase adjust
     double xhi = 0.0;
     double xlo = 0.0;
-    xhi = (double)x + 0.5;
+    xhi = (double) x + 0.5;
-    if (reference_fabsl(x) > 0.5L)
+    if(reference_fabsl(x) > 0.5L)
         xlo = xhi - x;
         xlo = 0.5 - xlo;
@@ -2214,69 +1943,61 @@
     // reduce to [-0.5, 0.5]
-    if (xhi < -0.5)
+    if( xhi < -0.5 )
         xhi = -1.0 - xhi;
         xlo = -xlo;
-    else if (xhi > 0.5)
+    else if ( xhi > 0.5 )
         xhi = 1.0 - xhi;
         xlo = -xlo;
     // cosPi zeros are all +0
-    if (xhi == 0.0 && xlo == 0.0) return 0.0;
+    if( xhi == 0.0 && xlo == 0.0 )
+        return 0.0;
     xhi *= M_PI;
     xlo *= M_PI;
     xhi += xlo;
-    return reference_sinl(xhi);
+    return reference_sinl( xhi );
     // phase adjust
     x += 0.5L;
     // reduce to [-0.5, 0.5]
-    if (x < -0.5L)
+    if( x < -0.5L )
         x = -1.0L - x;
-    else if (x > 0.5L)
+    else if ( x > 0.5L )
         x = 1.0L - x;
     // cosPi zeros are all +0
-    if (x == 0.0L) return 0.0L;
+    if( x == 0.0L )
+        return 0.0L;
-    return reference_sinl(x * M_PIL);
+    return reference_sinl( x * M_PIL );
-long double reference_dividel(long double x, long double y)
+long double reference_dividel( long double x, long double y)
     double dx = x;
     double dy = y;
-    return dx / dy;
+    return dx/dy;
-typedef struct
-    double hi, lo;
-} double_double;
+typedef struct{ double hi, lo; } double_double;
-// Split doubles_double into a series of consecutive 26-bit precise doubles and
-// a remainder. Note for later -- for multiplication, it might be better to
-// split each double into a power of two and two 26 bit portions
-//                      multiplication of a double double by a known power of
-//                      two is cheap. The current approach causes some inexact
-//                      arithmetic in mul_dd.
-static inline void split_dd(double_double x, double_double *hi,
-                            double_double *lo)
+// Split doubles_double into a series of consecutive 26-bit precise doubles and a remainder.
+// Note for later -- for multiplication, it might be better to split each double into a power of two and two 26 bit portions
+//                      multiplication of a double double by a known power of two is cheap. The current approach causes some inexact arithmetic in mul_dd.
+static inline void split_dd( double_double x, double_double *hi, double_double *lo )
-    union {
-        double d;
-        cl_ulong u;
-    } u;
+    union{ double d; cl_ulong u;}u;
     u.d = x.hi;
     u.u &= 0xFFFFFFFFF8000000ULL;
     hi->hi = u.d;
@@ -2298,10 +2019,10 @@
     lo->lo = x.hi + x.lo;
-static inline double_double accum_d(double_double a, double b)
+static inline double_double accum_d( double_double a, double b )
     double temp;
-    if (fabs(b) > fabs(a.hi))
+    if( fabs(b) > fabs(a.hi) )
         temp = a.hi;
         a.hi += b;
@@ -2314,45 +2035,47 @@
         a.lo += b - (a.hi - temp);
-    if (isnan(a.lo)) a.lo = 0.0;
+    if( isnan( a.lo ) )
+        a.lo = 0.0;
     return a;
-static inline double_double add_dd(double_double a, double_double b)
+static inline double_double add_dd( double_double a, double_double b )
-    double_double r = { -0.0 - 0.0 };
+    double_double r = {-0.0 -0.0 };
-    if (isinf(a.hi) || isinf(b.hi) || isnan(a.hi) || isnan(b.hi) || 0.0 == a.hi
-        || 0.0 == b.hi)
+    if( isinf(a.hi) || isinf( b.hi )  ||
+       isnan(a.hi) || isnan( b.hi )  ||
+       0.0 == a.hi || 0.0 == b.hi )
         r.hi = a.hi + b.hi;
         r.lo = a.lo + b.lo;
-        if (isnan(r.lo)) r.lo = 0.0;
+        if( isnan( r.lo ) )
+            r.lo = 0.0;
         return r;
-    // merge sort terms by magnitude -- here we assume that |a.hi| > |a.lo|,
-    // |b.hi| > |b.lo|, so we don't have to do the first merge pass
+    //merge sort terms by magnitude -- here we assume that |a.hi| > |a.lo|, |b.hi| > |b.lo|, so we don't have to do the first merge pass
     double terms[4] = { a.hi, b.hi, a.lo, b.lo };
     double temp;
-    // Sort hi terms
-    if (fabs(terms[0]) < fabs(terms[1]))
+    //Sort hi terms
+    if( fabs(terms[0]) < fabs(terms[1]) )
         temp = terms[0];
         terms[0] = terms[1];
         terms[1] = temp;
-    // sort lo terms
-    if (fabs(terms[2]) < fabs(terms[3]))
+    //sort lo terms
+    if( fabs(terms[2]) < fabs(terms[3]) )
         temp = terms[2];
         terms[2] = terms[3];
         terms[3] = temp;
     // Fix case where small high term is less than large low term
-    if (fabs(terms[1]) < fabs(terms[2]))
+    if( fabs(terms[1]) < fabs(terms[2]) )
         temp = terms[1];
         terms[1] = terms[2];
@@ -2375,96 +2098,111 @@
     temp = r.hi;
     r.hi += r.lo;
     r.lo = r.lo - (r.hi - temp);
-    if (isnan(r.lo)) r.lo = 0.0;
+    if( isnan( r.lo ) )
+        r.lo = 0.0;
     return r;
-static inline double_double mul_dd(double_double a, double_double b)
+static inline double_double mul_dd( double_double a, double_double b )
-    double_double result = { -0.0, -0.0 };
+    double_double result = {-0.0,-0.0};
     // Inf, nan and 0
-    if (isnan(a.hi) || isnan(b.hi) || isinf(a.hi) || isinf(b.hi) || 0.0 == a.hi
-        || 0.0 == b.hi)
+    if( isnan( a.hi ) || isnan( b.hi ) ||
+       isinf( a.hi ) || isinf( b.hi ) ||
+       0.0 == a.hi || 0.0 == b.hi )
         result.hi = a.hi * b.hi;
         return result;
     double_double ah, al, bh, bl;
-    split_dd(a, &ah, &al);
-    split_dd(b, &bh, &bl);
+    split_dd( a, &ah, &al );
+    split_dd( b, &bh, &bl );
-    double p0 = ah.hi * bh.hi; // exact    (52 bits in product) 0
-    double p1 = ah.hi * bh.lo; // exact    (52 bits in product) 26
-    double p2 = ah.lo * bh.hi; // exact    (52 bits in product) 26
-    double p3 = ah.lo * bh.lo; // exact    (52 bits in product) 52
-    double p4 = al.hi * bh.hi; // exact    (52 bits in product) 52
-    double p5 = al.hi * bh.lo; // exact    (52 bits in product) 78
-    double p6 = al.lo * bh.hi; // inexact  (54 bits in product) 78
-    double p7 = al.lo * bh.lo; // inexact  (54 bits in product) 104
-    double p8 = ah.hi * bl.hi; // exact    (52 bits in product) 52
-    double p9 = ah.hi * bl.lo; // inexact  (54 bits in product) 78
-    double pA = ah.lo * bl.hi; // exact    (52 bits in product) 78
-    double pB = ah.lo * bl.lo; // inexact  (54 bits in product) 104
-    double pC = al.hi * bl.hi; // exact    (52 bits in product) 104
+    double p0 = ah.hi * bh.hi;        // exact    (52 bits in product) 0
+    double p1 = ah.hi * bh.lo;        // exact    (52 bits in product) 26
+    double p2 = ah.lo * bh.hi;        // exact    (52 bits in product) 26
+    double p3 = ah.lo * bh.lo;        // exact    (52 bits in product) 52
+    double p4 = al.hi * bh.hi;        // exact    (52 bits in product) 52
+    double p5 = al.hi * bh.lo;        // exact    (52 bits in product) 78
+    double p6 = al.lo * bh.hi;        // inexact  (54 bits in product) 78
+    double p7 = al.lo * bh.lo;        // inexact  (54 bits in product) 104
+    double p8 = ah.hi * bl.hi;        // exact    (52 bits in product) 52
+    double p9 = ah.hi * bl.lo;        // inexact  (54 bits in product) 78
+    double pA = ah.lo * bl.hi;        // exact    (52 bits in product) 78
+    double pB = ah.lo * bl.lo;        // inexact  (54 bits in product) 104
+    double pC = al.hi * bl.hi;        // exact    (52 bits in product) 104
     // the last 3 terms are two low to appear in the result
-    // take advantage of the known relative magnitudes of the partial products
-    // to avoid some sorting Combine 2**-78 and 2**-104 terms. Here we are a bit
-    // sloppy about canonicalizing the double_doubles
+    // accumulate from bottom up
+#if 0
+    // works but slow
+    result.hi = pC;
+    result = accum_d( result, pB );
+    result = accum_d( result, p7 );
+    result = accum_d( result, pA );
+    result = accum_d( result, p9 );
+    result = accum_d( result, p6 );
+    result = accum_d( result, p5 );
+    result = accum_d( result, p8 );
+    result = accum_d( result, p4 );
+    result = accum_d( result, p3 );
+    result = accum_d( result, p2 );
+    result = accum_d( result, p1 );
+    result = accum_d( result, p0 );
+    // canonicalize the result
+    double temp = result.hi;
+    result.hi += result.lo;
+    result.lo -= (result.hi - temp);
+    if( isnan( result.lo ) )
+        result.lo = 0.0;
+    return result;
+    // take advantage of the known relative magnitudes of the partial products to avoid some sorting
+    // Combine 2**-78 and 2**-104 terms. Here we are a bit sloppy about canonicalizing the double_doubles
     double_double t0 = { pA, pC };
     double_double t1 = { p9, pB };
     double_double t2 = { p6, p7 };
     double temp0, temp1, temp2;
-    t0 = accum_d(t0, p5); // there is an extra 2**-78 term to deal with
+    t0 = accum_d( t0, p5 );  // there is an extra 2**-78 term to deal with
-    // Add in 2**-52 terms. Here we are a bit sloppy about canonicalizing the
-    // double_doubles
-    temp0 = t0.hi;
-    temp1 = t1.hi;
-    temp2 = t2.hi;
-    t0.hi += p3;
-    t1.hi += p4;
-    t2.hi += p8;
-    temp0 -= t0.hi - p3;
-    temp1 -= t1.hi - p4;
-    temp2 -= t2.hi - p8;
-    t0.lo += temp0;
-    t1.lo += temp1;
-    t2.lo += temp2;
+    // Add in 2**-52 terms. Here we are a bit sloppy about canonicalizing the double_doubles
+    temp0 = t0.hi;      temp1 = t1.hi;      temp2 = t2.hi;
+    t0.hi += p3;        t1.hi += p4;        t2.hi += p8;
+    temp0 -= t0.hi-p3;  temp1 -= t1.hi-p4;  temp2 -= t2.hi - p8;
+    t0.lo += temp0;     t1.lo += temp1;     t2.lo += temp2;
-    // Add in 2**-26 terms. Here we are a bit sloppy about canonicalizing the
-    // double_doubles
-    temp1 = t1.hi;
-    temp2 = t2.hi;
-    t1.hi += p1;
-    t2.hi += p2;
-    temp1 -= t1.hi - p1;
-    temp2 -= t2.hi - p2;
-    t1.lo += temp1;
-    t2.lo += temp2;
+    // Add in 2**-26 terms. Here we are a bit sloppy about canonicalizing the double_doubles
+    temp1 = t1.hi;      temp2 = t2.hi;
+    t1.hi += p1;        t2.hi += p2;
+    temp1 -= t1.hi-p1;  temp2 -= t2.hi - p2;
+    t1.lo += temp1;     t2.lo += temp2;
     // Combine accumulators to get the low bits of result
-    t1 = add_dd(t1, add_dd(t2, t0));
+    t1 = add_dd( t1, add_dd( t2, t0 ) );
     // Add in MSB's, and round to precision
-    return accum_d(t1, p0); // canonicalizes
+    return accum_d( t1, p0 );  // canonicalizes
-long double reference_exp10l(long double z)
+long double reference_exp10l( long double z )
-    const double_double log2_10 = { HEX_DBL(+, 1, a934f0979a371, +, 1),
-                                    HEX_DBL(+, 1, 7f2495fb7fa6d, -, 53) };
+    const double_double log2_10 = { HEX_DBL( +, 1, a934f0979a371, +, 1 ), HEX_DBL( +, 1, 7f2495fb7fa6d, -, 53 ) };
     double_double x;
     int j;
     // Handle NaNs
-    if (isnan(z)) return z;
+    if( isnan(z) )
+        return z;
     // init x
     x.hi = z;
@@ -2473,193 +2211,172 @@
     // 10**x = exp2( x * log2(10) )
-    x = mul_dd(x, log2_10); // x * log2(10)
+    x = mul_dd( x, log2_10);    // x * log2(10)
-    // Deal with overflow and underflow for exp2(x) stage next
-    if (x.hi >= 1025) return INFINITY;
+    //Deal with overflow and underflow for exp2(x) stage next
+    if( x.hi >= 1025 )
+        return INFINITY;
-    if (x.hi < -1075 - 24) return +0.0;
+    if( x.hi < -1075-24 )
+        return +0.0;
     // find nearest integer to x
-    int i = (int)rint(x.hi);
+    int i = (int) rint(x.hi);
     // x now holds fractional part.  The result would be then 2**i  * exp2( x )
     x.hi -= i;
-    // We could attempt to find a minimax polynomial for exp2(x) over the range
-    // x = [-0.5, 0.5]. However, this would converge very slowly near the
-    // extrema, where 0.5**n is not a lot different from 0.5**(n+1), thereby
-    // requiring something like a 20th order polynomial to get 53 + 24 bits of
-    // precision. Instead we further reduce the range to [-1/32, 1/32] by
-    // observing that
+    // We could attempt to find a minimax polynomial for exp2(x) over the range x = [-0.5, 0.5].
+    // However, this would converge very slowly near the extrema, where 0.5**n is not a lot different
+    // from 0.5**(n+1), thereby requiring something like a 20th order polynomial to get 53 + 24 bits
+    // of precision. Instead we further reduce the range to [-1/32, 1/32] by observing that
     //  2**(a+b) = 2**a * 2**b
-    // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and
-    // reduce the range of x to [-1/32, 1/32] by subtracting away the nearest
-    // value of n/16 from x.
-    const double_double corrections[17] = {
-        { HEX_DBL(+, 1, 6a09e667f3bcd, -, 1),
-          HEX_DBL(-, 1, bdd3413b26456, -, 55) },
-        { HEX_DBL(+, 1, 7a11473eb0187, -, 1),
-          HEX_DBL(-, 1, 41577ee04992f, -, 56) },
-        { HEX_DBL(+, 1, 8ace5422aa0db, -, 1),
-          HEX_DBL(+, 1, 6e9f156864b27, -, 55) },
-        { HEX_DBL(+, 1, 9c49182a3f09, -, 1),
-          HEX_DBL(+, 1, c7c46b071f2be, -, 57) },
-        { HEX_DBL(+, 1, ae89f995ad3ad, -, 1),
-          HEX_DBL(+, 1, 7a1cd345dcc81, -, 55) },
-        { HEX_DBL(+, 1, c199bdd85529c, -, 1),
-          HEX_DBL(+, 1, 11065895048dd, -, 56) },
-        { HEX_DBL(+, 1, d5818dcfba487, -, 1),
-          HEX_DBL(+, 1, 2ed02d75b3707, -, 56) },
-        { HEX_DBL(+, 1, ea4afa2a490da, -, 1),
-          HEX_DBL(-, 1, e9c23179c2893, -, 55) },
-        { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-        { HEX_DBL(+, 1, 0b5586cf9890f, +, 0),
-          HEX_DBL(+, 1, 8a62e4adc610b, -, 54) },
-        { HEX_DBL(+, 1, 172b83c7d517b, +, 0),
-          HEX_DBL(-, 1, 19041b9d78a76, -, 55) },
-        { HEX_DBL(+, 1, 2387a6e756238, +, 0),
-          HEX_DBL(+, 1, 9b07eb6c70573, -, 54) },
-        { HEX_DBL(+, 1, 306fe0a31b715, +, 0),
-          HEX_DBL(+, 1, 6f46ad23182e4, -, 55) },
-        { HEX_DBL(+, 1, 3dea64c123422, +, 0),
-          HEX_DBL(+, 1, ada0911f09ebc, -, 55) },
-        { HEX_DBL(+, 1, 4bfdad5362a27, +, 0),
-          HEX_DBL(+, 1, d4397afec42e2, -, 56) },
-        { HEX_DBL(+, 1, 5ab07dd485429, +, 0),
-          HEX_DBL(+, 1, 6324c054647ad, -, 54) },
-        { HEX_DBL(+, 1, 6a09e667f3bcd, +, 0),
-          HEX_DBL(-, 1, bdd3413b26456, -, 54) }
+    // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and reduce the range
+    // of x to [-1/32, 1/32] by subtracting away the nearest value of n/16 from x.
+    const double_double corrections[17] =
+    {
+        { HEX_DBL( +, 1, 6a09e667f3bcd, -, 1 ), HEX_DBL( -, 1, bdd3413b26456, -, 55 ) },
+        { HEX_DBL( +, 1, 7a11473eb0187, -, 1 ), HEX_DBL( -, 1, 41577ee04992f, -, 56 ) },
+        { HEX_DBL( +, 1, 8ace5422aa0db, -, 1 ), HEX_DBL( +, 1, 6e9f156864b27, -, 55 ) },
+        { HEX_DBL( +, 1, 9c49182a3f09,  -, 1 ), HEX_DBL( +, 1, c7c46b071f2be, -, 57 ) },
+        { HEX_DBL( +, 1, ae89f995ad3ad, -, 1 ), HEX_DBL( +, 1, 7a1cd345dcc81, -, 55 ) },
+        { HEX_DBL( +, 1, c199bdd85529c, -, 1 ), HEX_DBL( +, 1, 11065895048dd, -, 56 ) },
+        { HEX_DBL( +, 1, d5818dcfba487, -, 1 ), HEX_DBL( +, 1, 2ed02d75b3707, -, 56 ) },
+        { HEX_DBL( +, 1, ea4afa2a490da, -, 1 ), HEX_DBL( -, 1, e9c23179c2893, -, 55 ) },
+        { HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ) },
+        { HEX_DBL( +, 1, 0b5586cf9890f, +, 0 ), HEX_DBL( +, 1, 8a62e4adc610b, -, 54 ) },
+        { HEX_DBL( +, 1, 172b83c7d517b, +, 0 ), HEX_DBL( -, 1, 19041b9d78a76, -, 55 ) },
+        { HEX_DBL( +, 1, 2387a6e756238, +, 0 ), HEX_DBL( +, 1, 9b07eb6c70573, -, 54 ) },
+        { HEX_DBL( +, 1, 306fe0a31b715, +, 0 ), HEX_DBL( +, 1, 6f46ad23182e4, -, 55 ) },
+        { HEX_DBL( +, 1, 3dea64c123422, +, 0 ), HEX_DBL( +, 1, ada0911f09ebc, -, 55 ) },
+        { HEX_DBL( +, 1, 4bfdad5362a27, +, 0 ), HEX_DBL( +, 1, d4397afec42e2, -, 56 ) },
+        { HEX_DBL( +, 1, 5ab07dd485429, +, 0 ), HEX_DBL( +, 1, 6324c054647ad, -, 54 ) },
+        { HEX_DBL( +, 1, 6a09e667f3bcd, +, 0 ), HEX_DBL( -, 1, bdd3413b26456, -, 54 ) }
-    int index = (int)rint(x.hi * 16.0);
-    x.hi -= (double)index * 0.0625;
+    int index = (int) rint( x.hi * 16.0 );
+    x.hi -= (double) index * 0.0625;
     // canonicalize x
     double temp = x.hi;
     x.hi += x.lo;
     x.lo -= x.hi - temp;
-    // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32].  Max
-    // Error: 2 * 0x1.e112p-87
-    const double_double c[] = { { HEX_DBL(+, 1, 62e42fefa39ef, -, 1),
-                                  HEX_DBL(+, 1, abc9e3ac1d244, -, 56) },
-                                { HEX_DBL(+, 1, ebfbdff82c58f, -, 3),
-                                  HEX_DBL(-, 1, 5e4987a631846, -, 57) },
-                                { HEX_DBL(+, 1, c6b08d704a0c, -, 5),
-                                  HEX_DBL(-, 1, d323200a05713, -, 59) },
-                                { HEX_DBL(+, 1, 3b2ab6fba4e7a, -, 7),
-                                  HEX_DBL(+, 1, c5ee8f8b9f0c1, -, 63) },
-                                { HEX_DBL(+, 1, 5d87fe78a672a, -, 10),
-                                  HEX_DBL(+, 1, 884e5e5cc7ecc, -, 64) },
-                                { HEX_DBL(+, 1, 430912f7e8373, -, 13),
-                                  HEX_DBL(+, 1, 4f1b59514a326, -, 67) },
-                                { HEX_DBL(+, 1, ffcbfc5985e71, -, 17),
-                                  HEX_DBL(-, 1, db7d6a0953b78, -, 71) },
-                                { HEX_DBL(+, 1, 62c150eb16465, -, 20),
-                                  HEX_DBL(+, 1, e0767c2d7abf5, -, 80) },
-                                { HEX_DBL(+, 1, b52502b5e953, -, 24),
-                                  HEX_DBL(+, 1, 6797523f944bc, -, 78) } };
-    size_t count = sizeof(c) / sizeof(c[0]);
+    // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32].  Max Error: 2 * 0x1.e112p-87
+    const double_double c[] = {
+        {HEX_DBL( +, 1, 62e42fefa39ef, -,  1 ), HEX_DBL( +, 1, abc9e3ac1d244, -, 56 )},
+        {HEX_DBL( +, 1, ebfbdff82c58f, -,  3 ), HEX_DBL( -, 1, 5e4987a631846, -, 57 )},
+        {HEX_DBL( +, 1, c6b08d704a0c,  -,  5 ), HEX_DBL( -, 1, d323200a05713, -, 59 )},
+        {HEX_DBL( +, 1, 3b2ab6fba4e7a, -,  7 ), HEX_DBL( +, 1, c5ee8f8b9f0c1, -, 63 )},
+        {HEX_DBL( +, 1, 5d87fe78a672a, -, 10 ), HEX_DBL( +, 1, 884e5e5cc7ecc, -, 64 )},
+        {HEX_DBL( +, 1, 430912f7e8373, -, 13 ), HEX_DBL( +, 1, 4f1b59514a326, -, 67 )},
+        {HEX_DBL( +, 1, ffcbfc5985e71, -, 17 ), HEX_DBL( -, 1, db7d6a0953b78, -, 71 )},
+        {HEX_DBL( +, 1, 62c150eb16465, -, 20 ), HEX_DBL( +, 1, e0767c2d7abf5, -, 80 )},
+        {HEX_DBL( +, 1, b52502b5e953,  -, 24 ), HEX_DBL( +, 1, 6797523f944bc, -, 78 )}
+    };
+    size_t count = sizeof( c ) / sizeof( c[0] );
     // Do polynomial
-    double_double r = c[count - 1];
-    for (j = (int)count - 2; j >= 0; j--) r = add_dd(c[j], mul_dd(r, x));
+    double_double r = c[count-1];
+    for( j = (int) count-2; j >= 0; j-- )
+        r = add_dd( c[j], mul_dd( r, x ) );
     // unwind approximation
-    r = mul_dd(r, x); // before: r =(exp2(x)-1)/x;   after: r = exp2(x) - 1
+    r = mul_dd( r, x );     // before: r =(exp2(x)-1)/x;   after: r = exp2(x) - 1
     // correct for [-0.5, 0.5] -> [-1/32, 1/32] reduction above
     //  exp2(x) = (r + 1) * correction = r * correction + correction
-    r = mul_dd(r, corrections[index + 8]);
-    r = add_dd(r, corrections[index + 8]);
+    r = mul_dd( r, corrections[index+8] );
+    r = add_dd( r, corrections[index+8] );
-    // Format result for output:
+// Format result for output:
     // Get mantissa
-    long double m = ((long double)r.hi + (long double)r.lo);
+    long double m = ((long double) r.hi + (long double) r.lo );
     // Handle a pesky overflow cases when long double = double
-    if (i > 512)
+    if( i > 512 )
-        m *= HEX_DBL(+, 1, 0, +, 512);
+        m *=  HEX_DBL( +, 1, 0, +, 512 );
         i -= 512;
-    else if (i < -512)
+    else if( i < -512 )
-        m *= HEX_DBL(+, 1, 0, -, 512);
+        m *= HEX_DBL( +, 1, 0, -, 512 );
         i += 512;
-    return m * ldexpl(1.0L, i);
+    return m * ldexpl( 1.0L, i );
-static double fallback_frexp(double x, int *iptr)
+static double fallback_frexp( double x, int *iptr )
     cl_ulong u, v;
     double fu, fv;
-    memcpy(&u, &x, sizeof(u));
+    memcpy( &u, &x, sizeof(u));
-    cl_ulong exponent = u & 0x7ff0000000000000ULL;
+    cl_ulong exponent = u &  0x7ff0000000000000ULL;
     cl_ulong mantissa = u & ~0x7ff0000000000000ULL;
     // add 1 to the exponent
     exponent += 0x0010000000000000ULL;
-    if ((cl_long)exponent < (cl_long)0x0020000000000000LL)
+    if( (cl_long) exponent < (cl_long) 0x0020000000000000LL )
     { // subnormal, NaN, Inf
         mantissa |= 0x3fe0000000000000ULL;
         v = mantissa & 0xfff0000000000000ULL;
         u = mantissa;
-        memcpy(&fv, &v, sizeof(v));
-        memcpy(&fu, &u, sizeof(u));
+        memcpy( &fv, &v, sizeof(v));
+        memcpy( &fu, &u, sizeof(u));
         fu -= fv;
-        memcpy(&v, &fv, sizeof(v));
-        memcpy(&u, &fu, sizeof(u));
+        memcpy( &v, &fv, sizeof(v));
+        memcpy( &u, &fu, sizeof(u));
-        exponent = u & 0x7ff0000000000000ULL;
+        exponent = u &  0x7ff0000000000000ULL;
         mantissa = u & ~0x7ff0000000000000ULL;
-        *iptr = (exponent >> 52) + (-1022 + 1 - 1022);
+        *iptr = (exponent >> 52) + (-1022 + 1 -1022);
         u = mantissa | 0x3fe0000000000000ULL;
-        memcpy(&fu, &u, sizeof(u));
+        memcpy( &fu, &u, sizeof(u));
         return fu;
     *iptr = (exponent >> 52) - 1023;
     u = mantissa | 0x3fe0000000000000ULL;
-    memcpy(&fu, &u, sizeof(u));
+    memcpy( &fu, &u, sizeof(u));
     return fu;
 // Assumes zeros, infinities and NaNs handed elsewhere
-static inline int extract(double x, cl_ulong *mant)
+static inline int extract( double x, cl_ulong *mant );
+static inline int extract( double x, cl_ulong *mant )
-    static double (*frexpp)(double, int *) = NULL;
+    static double (*frexpp)(double, int*) = NULL;
     int e;
     // verify that frexp works properly
-    if (NULL == frexpp)
+    if( NULL == frexpp )
-        if (0.5 == frexp(HEX_DBL(+, 1, 0, -, 1030), &e) && e == -1029)
+        if( 0.5 == frexp( HEX_DBL( +, 1, 0, -, 1030 ), &e ) && e == -1029 )
             frexpp = frexp;
             frexpp = fallback_frexp;
-    *mant = (cl_ulong)(HEX_DBL(+, 1, 0, +, 64) * fabs(frexpp(x, &e)));
+    *mant = (cl_ulong) (HEX_DBL( +, 1, 0, +, 64 ) * fabs( frexpp( x, &e )));
     return e - 1;
 // Return 128-bit product of a*b  as (hi << 64) + lo
-static inline void mul128(cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo)
+static inline void mul128( cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo );
+static inline void mul128( cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo )
     cl_ulong alo = a & 0xffffffffULL;
     cl_ulong ahi = a >> 32;
@@ -2670,90 +2387,92 @@
     cl_ulong ahiblo = ahi * blo;
     cl_ulong ahibhi = ahi * bhi;
-    alobhi += (aloblo >> 32)
-        + (ahiblo
-           & 0xffffffffULL); // cannot overflow: (2^32-1)^2 + 2 * (2^32-1)   =
-                             // (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1
-    *hi = ahibhi + (alobhi >> 32)
-        + (ahiblo >> 32); // cannot overflow: (2^32-1)^2 + 2 * (2^32-1)   =
-                          // (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1
+    alobhi += (aloblo >> 32) + (ahiblo & 0xffffffffULL);  // cannot overflow: (2^32-1)^2 + 2 * (2^32-1)   = (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1
+    *hi = ahibhi + (alobhi >> 32) + (ahiblo >> 32);       // cannot overflow: (2^32-1)^2 + 2 * (2^32-1)   = (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1
     *lo = (aloblo & 0xffffffffULL) | (alobhi << 32);
-static double round_to_nearest_even_double(cl_ulong hi, cl_ulong lo,
-                                           int exponent)
+// Move the most significant non-zero bit to the MSB
+// Note: not general. Only works if the most significant non-zero bit is at MSB-1
+static inline void renormalize( cl_ulong *hi, cl_ulong *lo, int *exponent )
-    union {
-        cl_ulong u;
-        cl_double d;
-    } u;
+    if( 0 == (0x8000000000000000ULL & *hi ))
+    {
+        *hi <<= 1;
+        *hi |= *lo >> 63;
+        *lo <<= 1;
+        *exponent -= 1;
+    }
+static double round_to_nearest_even_double( cl_ulong hi, cl_ulong lo, int exponent );
+static double round_to_nearest_even_double( cl_ulong hi, cl_ulong lo, int exponent )
+    union{ cl_ulong u; cl_double d;} u;
     // edges
-    if (exponent > 1023) return INFINITY;
-    if (exponent == -1075 && (hi | (lo != 0)) > 0x8000000000000000ULL)
-        return HEX_DBL(+, 1, 0, -, 1074);
-    if (exponent <= -1075) return 0.0;
+    if( exponent > 1023 )        return INFINITY;
+    if( exponent == -1075 && (hi | (lo!=0)) > 0x8000000000000000ULL )
+        return HEX_DBL( +, 1, 0, -, 1074 );
+    if( exponent <= -1075 )       return 0.0;
-    // Figure out which bits go where
+    //Figure out which bits go where
     int shift = 11;
-    if (exponent < -1022)
+    if( exponent < -1022 )
-        shift -= 1022 + exponent; // subnormal: shift is not 52
-        exponent = -1023; //              set exponent to 0
+        shift -= 1022 + exponent;               // subnormal: shift is not 52
+        exponent = -1023;                       //              set exponent to 0
-        hi &= 0x7fffffffffffffffULL; // normal: leading bit is implicit. Remove
-                                     // it.
+        hi &= 0x7fffffffffffffffULL;           // normal: leading bit is implicit. Remove it.
     // Assemble the double (round toward zero)
-    u.u = (hi >> shift) | ((cl_ulong)(exponent + 1023) << 52);
+    u.u = (hi >> shift) | ((cl_ulong) (exponent + 1023) << 52);
     // put a representation of the residual bits into hi
-    hi <<= (64 - shift);
+    hi <<= (64-shift);
     hi |= lo >> shift;
-    lo <<= (64 - shift);
+    lo <<= (64-shift );
     hi |= lo != 0;
-    // round to nearest, ties to even
-    if (hi < 0x8000000000000000ULL) return u.d;
-    if (hi == 0x8000000000000000ULL)
-        u.u += u.u & 1ULL;
-    else
-        u.u++;
+    //round to nearest, ties to even
+    if( hi < 0x8000000000000000ULL )    return u.d;
+    if( hi == 0x8000000000000000ULL )   u.u += u.u & 1ULL;
+    else                                u.u++;
     return u.d;
-// Shift right.  Bits lost on the right will be OR'd together and OR'd with the
-// LSB
-static inline void shift_right_sticky_128(cl_ulong *hi, cl_ulong *lo, int shift)
+// Shift right.  Bits lost on the right will be OR'd together and OR'd with the LSB
+static inline void shift_right_sticky_128( cl_ulong *hi, cl_ulong *lo, int shift );
+static inline void shift_right_sticky_128( cl_ulong *hi, cl_ulong *lo, int shift )
     cl_ulong sticky = 0;
     cl_ulong h = *hi;
     cl_ulong l = *lo;
-    if (shift >= 64)
+    if( shift >= 64 )
         shift -= 64;
         sticky = 0 != lo;
         l = h;
         h = 0;
-        if (shift >= 64)
+        if( shift >= 64 )
             sticky |= (0 != l);
             l = 0;
-            sticky |= (0 != (l << (64 - shift)));
+            sticky |= (0 != (l << (64-shift)));
             l >>= shift;
-        sticky |= (0 != (l << (64 - shift)));
+        sticky |= (0 != (l << (64-shift)));
         l >>= shift;
-        l |= h << (64 - shift);
+        l |=  h << (64-shift);
         h >>= shift;
@@ -2762,10 +2481,9 @@
 // 128-bit add  of ((*hi << 64) + *lo) + ((chi << 64) + clo)
-// If the 129 bit result doesn't fit, bits lost off the right end will be OR'd
-// with the LSB
-static inline void add128(cl_ulong *hi, cl_ulong *lo, cl_ulong chi,
-                          cl_ulong clo, int *exponent)
+// If the 129 bit result doesn't fit, bits lost off the right end will be OR'd with the LSB
+static inline void add128( cl_ulong *hi, cl_ulong *lo, cl_ulong chi, cl_ulong clo, int *exp );
+static inline void add128( cl_ulong *hi, cl_ulong *lo, cl_ulong chi, cl_ulong clo, int *exponent )
     cl_ulong carry, carry2;
     // extended precision add
@@ -2773,16 +2491,15 @@
     chi = add_carry(*hi, chi, &carry2);
     chi = add_carry(chi, carry, &carry);
-    // If we overflowed the 128 bit result
-    if (carry || carry2)
+    //If we overflowed the 128 bit result
+    if( carry || carry2 )
-        carry = clo & 1; // set aside low bit
-        clo >>= 1; // right shift low 1
-        clo |= carry; // or back in the low bit, so we don't come to believe
-                      // this is an exact half way case for rounding
-        clo |= chi << 63; // move lowest high bit into highest bit of lo
-        chi >>= 1; // right shift hi
-        chi |= 0x8000000000000000ULL; // move the carry bit into hi.
+        carry = clo & 1;                        // set aside low bit
+        clo >>= 1;                              // right shift low 1
+        clo |= carry;                           // or back in the low bit, so we don't come to believe this is an exact half way case for rounding
+        clo |= chi << 63;                       // move lowest high bit into highest bit of lo
+        chi >>= 1;                              // right shift hi
+        chi |= 0x8000000000000000ULL;           // move the carry bit into hi.
         *exponent = *exponent + 1;
@@ -2791,49 +2508,48 @@
 // 128-bit subtract  of ((chi << 64) + clo)  - ((*hi << 64) + *lo)
-static inline void sub128(cl_ulong *chi, cl_ulong *clo, cl_ulong hi,
-                          cl_ulong lo, cl_ulong *signC, int *expC)
+static inline void sub128( cl_ulong *chi, cl_ulong *clo, cl_ulong hi, cl_ulong lo, cl_ulong *signC, int *expC );
+static inline void sub128( cl_ulong *chi, cl_ulong *clo, cl_ulong hi, cl_ulong lo, cl_ulong *signC, int *expC )
     cl_ulong rHi = *chi;
     cl_ulong rLo = *clo;
     cl_ulong carry, carry2;
-    // extended precision subtract
+    //extended precision subtract
     rLo = sub_carry(rLo, lo, &carry);
     rHi = sub_carry(rHi, hi, &carry2);
     rHi = sub_carry(rHi, carry, &carry);
     // Check for sign flip
-    if (carry || carry2)
+    if( carry || carry2 )
         *signC ^= 0x8000000000000000ULL;
-        // negate rLo, rHi:   -x = (x ^ -1) + 1
+        //negate rLo, rHi:   -x = (x ^ -1) + 1
         rLo ^= -1ULL;
         rHi ^= -1ULL;
         rHi += 0 == rLo;
-    // normalize -- move the most significant non-zero bit to the MSB, and
-    // adjust exponent accordingly
-    if (rHi == 0)
+    // normalize -- move the most significant non-zero bit to the MSB, and adjust exponent accordingly
+    if( rHi == 0 )
         rHi = rLo;
         *expC = *expC - 64;
         rLo = 0;
-    if (rHi)
+    if( rHi )
         int shift = 32;
         cl_ulong test = 1ULL << 32;
-        while (0 == (rHi & 0x8000000000000000ULL))
+        while( 0 == (rHi & 0x8000000000000000ULL))
-            if (rHi < test)
+            if( rHi < test )
                 rHi <<= shift;
-                rHi |= rLo >> (64 - shift);
+                rHi |= rLo >> (64-shift);
                 rLo <<= shift;
                 *expC = *expC - shift;
@@ -2843,7 +2559,7 @@
-        // zero
+        //zero
         *expC = INT_MIN;
         *signC = 0;
@@ -2853,7 +2569,7 @@
     *clo = rLo;
-long double reference_fmal(long double x, long double y, long double z)
+long double reference_fmal( long double x, long double y, long double z)
     static const cl_ulong kMSB = 0x8000000000000000ULL;
@@ -2863,91 +2579,75 @@
     double c = z;
     // Make bits accessible
-    union {
-        cl_ulong u;
-        cl_double d;
-    } ua;
-    ua.d = a;
-    union {
-        cl_ulong u;
-        cl_double d;
-    } ub;
-    ub.d = b;
-    union {
-        cl_ulong u;
-        cl_double d;
-    } uc;
-    uc.d = c;
+    union{ cl_ulong u; cl_double d; } ua; ua.d = a;
+    union{ cl_ulong u; cl_double d; } ub; ub.d = b;
+    union{ cl_ulong u; cl_double d; } uc; uc.d = c;
     // deal with Nans, infinities and zeros
-    if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b) || isinf(c)
-        || 0 == (ua.u & ~kMSB) || // a == 0, defeat host FTZ behavior
-        0 == (ub.u & ~kMSB) || // b == 0, defeat host FTZ behavior
-        0 == (uc.u & ~kMSB)) // c == 0, defeat host FTZ behavior
+    if( isnan( a ) || isnan( b ) || isnan(c)    ||
+        isinf( a ) || isinf( b ) || isinf(c)    ||
+        0 == ( ua.u & ~kMSB)                ||  // a == 0, defeat host FTZ behavior
+        0 == ( ub.u & ~kMSB)                ||  // b == 0, defeat host FTZ behavior
+        0 == ( uc.u & ~kMSB)                )   // c == 0, defeat host FTZ behavior
-        if (isinf(c) && !isinf(a) && !isinf(b)) return (c + a) + b;
+        if( isinf( c ) && !isinf(a) && !isinf(b) )
+            return (c + a) + b;
-        a = (double)reference_multiplyl(
-            a, b); // some risk that the compiler will insert a non-compliant
-                   // fma here on some platforms.
-        return reference_addl(
-            a,
-            c); // We use STDC FP_CONTRACT OFF above to attempt to defeat that.
+        a = (double) reference_multiplyl( a, b );   // some risk that the compiler will insert a non-compliant fma here on some platforms.
+        return reference_addl(a, c);                // We use STDC FP_CONTRACT OFF above to attempt to defeat that.
     // extract exponent and mantissa
     //   exponent is a standard unbiased signed integer
     //   mantissa is a cl_uint, with leading non-zero bit positioned at the MSB
     cl_ulong mantA, mantB, mantC;
-    int expA = extract(a, &mantA);
-    int expB = extract(b, &mantB);
-    int expC = extract(c, &mantC);
-    cl_ulong signC = uc.u & kMSB; // We'll need the sign bit of C later to
-                                  // decide if we are adding or subtracting
+    int expA = extract( a, &mantA );
+    int expB = extract( b, &mantB );
+    int expC = extract( c, &mantC );
+    cl_ulong signC = uc.u & kMSB;               // We'll need the sign bit of C later to decide if we are adding or subtracting
-    // exact product of A and B
+// exact product of A and B
     int exponent = expA + expB;
     cl_ulong sign = (ua.u ^ ub.u) & kMSB;
     cl_ulong hi, lo;
-    mul128(mantA, mantB, &hi, &lo);
+    mul128( mantA, mantB, &hi, &lo );
     // renormalize
-    if (0 == (kMSB & hi))
+    if( 0 == (kMSB & hi) )
         hi <<= 1;
         hi |= lo >> 63;
         lo <<= 1;
-        exponent++; // 2**63 * 2**63 gives 2**126. If the MSB was set, then our
-                    // exponent increased.
+        exponent++;         // 2**63 * 2**63 gives 2**126. If the MSB was set, then our exponent increased.
-    // infinite precision add
+//infinite precision add
     cl_ulong chi = mantC;
     cl_ulong clo = 0;
-    if (exponent >= expC)
+    if( exponent >= expC )
         // Normalize C relative to the product
-        if (exponent > expC)
-            shift_right_sticky_128(&chi, &clo, exponent - expC);
+        if( exponent > expC )
+            shift_right_sticky_128( &chi, &clo, exponent - expC );
         // Add
-        if (sign ^ signC)
-            sub128(&hi, &lo, chi, clo, &sign, &exponent);
+        if( sign ^ signC )
+            sub128( &hi, &lo, chi, clo, &sign, &exponent );
-            add128(&hi, &lo, chi, clo, &exponent);
+            add128( &hi, &lo, chi, clo, &exponent );
         // Shift the product relative to C so that their exponents match
-        shift_right_sticky_128(&hi, &lo, expC - exponent);
+        shift_right_sticky_128( &hi, &lo, expC - exponent );
         // add
-        if (sign ^ signC)
-            sub128(&chi, &clo, hi, lo, &signC, &expC);
+        if( sign ^ signC )
+            sub128( &chi, &clo, hi, lo, &signC, &expC );
-            add128(&chi, &clo, hi, lo, &expC);
+            add128( &chi, &clo, hi, lo, &expC );
         hi = chi;
         lo = clo;
@@ -2965,49 +2665,61 @@
-long double reference_madl(long double a, long double b, long double c)
+long double reference_madl( long double a, long double b, long double c) { return a * b + c; }
+//long double my_nextafterl(long double x, long double y){  return (long double) nextafter( (double) x, (double) y ); }
+long double reference_recipl( long double x){ return 1.0L / x; }
+long double reference_rootnl( long double x, int i)
-    return a * b + c;
+    double hi,  lo;
+    long double l;
+    //rootn ( x, 0 )  returns a NaN.
+    if( 0 == i )
+        return cl_make_nan();
-long double reference_recipl(long double x) { return 1.0L / x; }
+    //rootn ( x, n )  returns a NaN for x < 0 and n is even.
+    if( x < 0.0L && 0 == (i&1) )
+        return cl_make_nan();
-long double reference_rootnl(long double x, int i)
-    // rootn ( x, 0 )  returns a NaN.
-    if (0 == i) return cl_make_nan();
-    // rootn ( x, n )  returns a NaN for x < 0 and n is even.
-    if (x < 0.0L && 0 == (i & 1)) return cl_make_nan();
-    if (isinf(x))
+    if( isinf(x) )
-        if (i < 0) return reference_copysignl(0.0L, x);
+        if( i < 0 )
+            return reference_copysignl(0.0L, x);
         return x;
-    if (x == 0.0)
+    if( x == 0.0 )
-        switch (i & 0x80000001)
+        switch( i & 0x80000001 )
-            // rootn ( +-0,  n ) is +0 for even n > 0.
-            case 0: return 0.0L;
+            //rootn ( +-0,  n ) is +0 for even n > 0.
+            case 0:
+                return 0.0L;
-            // rootn ( +-0,  n ) is +-0 for odd n > 0.
-            case 1: return x;
+            //rootn ( +-0,  n ) is +-0 for odd n > 0.
+            case 1:
+                return x;
-            // rootn ( +-0,  n ) is +inf for even n < 0.
-            case 0x80000000: return INFINITY;
+            //rootn ( +-0,  n ) is +inf for even n < 0.
+            case 0x80000000:
+                return INFINITY;
-            // rootn ( +-0,  n ) is +-inf for odd n < 0.
-            case 0x80000001: return copysign(INFINITY, x);
+            //rootn ( +-0,  n ) is +-inf for odd n < 0.
+            case 0x80000001:
+                return copysign(INFINITY, x);
-    if (i == 1) return x;
+    if( i == 1 )
+        return x;
-    if (i == -1) return 1.0 / x;
+    if( i == -1 )
+        return 1.0 / x;
     long double sign = x;
     x = reference_fabsl(x);
@@ -3015,267 +2727,261 @@
     DivideDD(&iHi, &iLo, 1.0, i);
     x = reference_powl(x, iHi) * reference_powl(x, iLo);
-    return reference_copysignl(x, sign);
+    return reference_copysignl( x, sign );
-long double reference_rsqrtl(long double x) { return 1.0L / sqrtl(x); }
-long double reference_sinpil(long double x)
+long double reference_rsqrtl( long double x){ return 1.0L / sqrtl(x); }
+//long double reference_sincosl( long double x, long double *c ){ *c = reference_cosl(x); return reference_sinl(x); }
+long double reference_sinpil( long double x)
     double r = reduce1l(x);
     // reduce to [-0.5, 0.5]
-    if (r < -0.5L)
+    if( r < -0.5L )
         r = -1.0L - r;
-    else if (r > 0.5L)
+    else if ( r > 0.5L )
         r = 1.0L - r;
     // sinPi zeros have the same sign as x
-    if (r == 0.0L) return reference_copysignl(0.0L, x);
+    if( r == 0.0L )
+        return reference_copysignl(0.0L, x);
-    return reference_sinl(r * M_PIL);
+    return reference_sinl( r * M_PIL );
-long double reference_tanpil(long double x)
+long double reference_tanpil( long double x)
     // set aside the sign  (allows us to preserve sign of -0)
-    long double sign = reference_copysignl(1.0L, x);
+    long double sign = reference_copysignl( 1.0L, x);
     long double z = reference_fabsl(x);
     // if big and even  -- caution: only works if x only has single precision
-    if (z >= HEX_LDBL(+, 1, 0, +, 53))
+    if( z >= HEX_LDBL( +, 1, 0, +, 53 ) )
-        if (z == INFINITY) return x - x; // nan
+        if( z == INFINITY )
+            return x - x;       // nan
-        return reference_copysignl(
-            0.0L, x); // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
+        return reference_copysignl( 0.0L, x);   // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
     // reduce to the range [ -0.5, 0.5 ]
-    long double nearest =
-        reference_rintl(z); // round to nearest even places n + 0.5 values in
-                            // the right place for us
-    int64_t i =
-        (int64_t)nearest; // test above against 0x1.0p53 avoids overflow here
+    long double nearest = reference_rintl( z );     // round to nearest even places n + 0.5 values in the right place for us
+    int64_t i = (int64_t) nearest;          // test above against 0x1.0p53 avoids overflow here
     z -= nearest;
-    // correction for odd integer x for the right sign of zero
-    if ((i & 1) && z == 0.0L) sign = -sign;
+    //correction for odd integer x for the right sign of zero
+    if( (i&1) && z == 0.0L )
+        sign = -sign;
     // track changes to the sign
-    sign *= reference_copysignl(1.0L, z); // really should just be an xor
-    z = reference_fabsl(z); // remove the sign again
+    sign *= reference_copysignl(1.0L, z);       // really should just be an xor
+    z = reference_fabsl(z);                    // remove the sign again
     // reduce once more
-    // If we don't do this, rounding error in z * M_PI will cause us not to
-    // return infinities properly
-    if (z > 0.25L)
+    // If we don't do this, rounding error in z * M_PI will cause us not to return infinities properly
+    if( z > 0.25L )
         z = 0.5L - z;
-        return sign
-            / reference_tanl(z
-                             * M_PIL); // use system tan to get the right result
+        return sign / reference_tanl( z * M_PIL );      // use system tan to get the right result
-    return sign
-        * reference_tanl(z * M_PIL); // use system tan to get the right result
+    return sign * reference_tanl( z * M_PIL );          // use system tan to get the right result
-long double reference_pownl(long double x, int i)
+long double reference_pownl( long double x, int i ){ return reference_powl( x, (long double) i ); }
+long double reference_powrl( long double x, long double y )
-    return reference_powl(x, (long double)i);
+    //powr ( x, y ) returns NaN for x < 0.
+    if( x < 0.0L )
+        return cl_make_nan();
-long double reference_powrl(long double x, long double y)
-    // powr ( x, y ) returns NaN for x < 0.
-    if (x < 0.0L) return cl_make_nan();
+    //powr ( x, NaN ) returns the NaN for x >= 0.
+    //powr ( NaN, y ) returns the NaN.
+    if( isnan(x) || isnan(y) )
+        return x + y;   // Note: behavior different here than for pow(1,NaN), pow(NaN, 0)
-    // powr ( x, NaN ) returns the NaN for x >= 0.
-    // powr ( NaN, y ) returns the NaN.
-    if (isnan(x) || isnan(y))
-        return x + y; // Note: behavior different here than for pow(1,NaN),
-                      // pow(NaN, 0)
-    if (x == 1.0L)
+    if( x == 1.0L )
-        // powr ( +1, +-inf ) returns NaN.
-        if (reference_fabsl(y) == INFINITY) return cl_make_nan();
+        //powr ( +1, +-inf ) returns NaN.
+        if( reference_fabsl(y) == INFINITY )
+            return cl_make_nan();
-        // powr ( +1, y ) is 1 for finite y.    (NaN handled above)
+        //powr ( +1, y ) is 1 for finite y.    (NaN handled above)
         return 1.0L;
-    if (y == 0.0L)
+    if( y == 0.0L )
-        // powr ( +inf, +-0 ) returns NaN.
-        // powr ( +-0, +-0 ) returns NaN.
-        if (x == 0.0L || x == INFINITY) return cl_make_nan();
+        //powr ( +inf, +-0 ) returns NaN.
+        //powr ( +-0, +-0 ) returns NaN.
+        if( x == 0.0L || x == INFINITY )
+            return cl_make_nan();
-        // powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already
-        // handled above)
+        //powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already handled above)
         return 1.0L;
-    if (x == 0.0L)
+    if( x == 0.0L )
-        // powr ( +-0, -inf) is +inf.
-        // powr ( +-0, y ) is +inf for finite y < 0.
-        if (y < 0.0L) return INFINITY;
+        //powr ( +-0, -inf) is +inf.
+        //powr ( +-0, y ) is +inf for finite y < 0.
+        if( y < 0.0L )
+            return INFINITY;
-        // powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
+        //powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
         return 0.0L;
-    return reference_powl(x, y);
+    return reference_powl( x, y );
-long double reference_addl(long double x, long double y)
+//long double my_fdiml( long double x, long double y){ return fdim( (double) x, (double) y ); }
+long double reference_addl( long double x, long double y)
-    volatile double a = (double)x;
-    volatile double b = (double)y;
+    volatile double a = (double) x;
+    volatile double b = (double) y;
-#if defined(__SSE2__)
+#if defined( __SSE2__ )
     // defeat x87
-    __m128d va = _mm_set_sd((double)a);
-    __m128d vb = _mm_set_sd((double)b);
-    va = _mm_add_sd(va, vb);
-    _mm_store_sd((double *)&a, va);
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_add_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
     a += b;
-    return (long double)a;
+    return (long double) a;
-long double reference_subtractl(long double x, long double y)
+long double reference_subtractl( long double x, long double y)
-    volatile double a = (double)x;
-    volatile double b = (double)y;
+    volatile double a = (double) x;
+    volatile double b = (double) y;
-#if defined(__SSE2__)
+#if defined( __SSE2__ )
     // defeat x87
-    __m128d va = _mm_set_sd((double)a);
-    __m128d vb = _mm_set_sd((double)b);
-    va = _mm_sub_sd(va, vb);
-    _mm_store_sd((double *)&a, va);
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_sub_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
     a -= b;
-    return (long double)a;
+    return (long double) a;
-long double reference_multiplyl(long double x, long double y)
+long double reference_multiplyl( long double x, long double y)
-    volatile double a = (double)x;
-    volatile double b = (double)y;
+    volatile double a = (double) x;
+    volatile double b = (double) y;
-#if defined(__SSE2__)
+#if defined( __SSE2__ )
     // defeat x87
-    __m128d va = _mm_set_sd((double)a);
-    __m128d vb = _mm_set_sd((double)b);
-    va = _mm_mul_sd(va, vb);
-    _mm_store_sd((double *)&a, va);
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_mul_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
     a *= b;
-    return (long double)a;
+    return (long double) a;
-long double reference_lgamma_rl(long double x, int *signp)
+/*long double my_remquol( long double x, long double y, int *iptr )
+    if( isnan(x) || isnan(y) ||
+        fabs(x) == INFINITY  ||
+        y == 0.0 )
+    {
+        *iptr = 0;
+        return NAN;
+    }
+    return remquo( (double) x, (double) y, iptr );
+long double reference_lgamma_rl( long double x, int *signp )
+//    long double lgamma_val = (long double)reference_lgamma( (double)x );
+//    *signp = signgam;
     *signp = 0;
     return x;
-int reference_isequall(long double x, long double y) { return x == y; }
-int reference_isfinitel(long double x) { return 0 != isfinite(x); }
-int reference_isgreaterl(long double x, long double y) { return x > y; }
-int reference_isgreaterequall(long double x, long double y) { return x >= y; }
-int reference_isinfl(long double x) { return 0 != isinf(x); }
-int reference_islessl(long double x, long double y) { return x < y; }
-int reference_islessequall(long double x, long double y) { return x <= y; }
-#if defined(__INTEL_COMPILER)
-int reference_islessgreaterl(long double x, long double y)
-    return 0 != islessgreaterl(x, y);
+int reference_isequall( long double x, long double y){ return x == y; }
+int reference_isfinitel( long double x){ return 0 != isfinite(x); }
+int reference_isgreaterl( long double x, long double y){ return x > y; }
+int reference_isgreaterequall( long double x, long double y){ return x >= y; }
+int reference_isinfl( long double x){ return 0 != isinf(x); }
+int reference_islessl( long double x, long double y){ return x < y; }
+int reference_islessequall( long double x, long double y){ return x <= y; }
+int reference_islessgreaterl( long double x, long double y){  return 0 != islessgreater( x, y ); }
+int reference_isnanl( long double x){ return 0 != isnan( x ); }
+int reference_isnormall( long double x){ return 0 != isnormal( (double) x ); }
+int reference_isnotequall( long double x, long double y){ return x != y; }
+int reference_isorderedl( long double x, long double y){ return x == x && y == y; }
+int reference_isunorderedl( long double x, long double y){ return isnan(x) || isnan( y ); }
+#if defined( __INTEL_COMPILER )
+int reference_signbitl( long double x){ return 0 != signbitl( x ); }
-int reference_islessgreaterl(long double x, long double y)
-    return 0 != islessgreater(x, y);
+int reference_signbitl( long double x){ return 0 != signbit( x ); }
-int reference_isnanl(long double x) { return 0 != isnan(x); }
-int reference_isnormall(long double x) { return 0 != isnormal((double)x); }
-int reference_isnotequall(long double x, long double y) { return x != y; }
-int reference_isorderedl(long double x, long double y)
-    return x == x && y == y;
-int reference_isunorderedl(long double x, long double y)
-    return isnan(x) || isnan(y);
-#if defined(__INTEL_COMPILER)
-int reference_signbitl(long double x) { return 0 != signbitl(x); }
-int reference_signbitl(long double x) { return 0 != signbit(x); }
-long double reference_copysignl(long double x, long double y);
-long double reference_roundl(long double x);
+long double reference_copysignl( long double x, long double y);
+long double reference_roundl( long double x );
 long double reference_cbrtl(long double x);
-long double reference_copysignl(long double x, long double y)
+long double reference_copysignl( long double x, long double y )
-    // We hope that the long double to double conversion proceeds with sign
-    // fidelity, even for zeros and NaNs
-    union {
-        double d;
-        cl_ulong u;
-    } u;
-    u.d = (double)y;
+    // We hope that the long double to double conversion proceeds with sign fidelity,
+    // even for zeros and NaNs
+    union{ double d; cl_ulong u;}u; u.d = (double) y;
     x = reference_fabsl(x);
-    if (u.u >> 63) x = -x;
+    if( u.u >> 63 )
+        x = -x;
     return x;
-long double reference_roundl(long double x)
+long double reference_roundl( long double x )
     // Since we are just using this to verify double precision, we can
     // use the double precision copysign here
 #if defined(__MINGW32__) && defined(__x86_64__)
     long double absx = reference_fabsl(x);
-    if (absx < 0.5L) return reference_copysignl(0.0L, x);
+    if (absx < 0.5L)
+    return reference_copysignl(0.0L, x);
-    return round((double)x);
+    return round( (double) x );
-long double reference_truncl(long double x)
+long double reference_truncl( long double x )
     // Since we are just using this to verify double precision, we can
     // use the double precision copysign here
-    return trunc((double)x);
+    return trunc( (double) x );
 static long double reference_scalblnl(long double x, long n);
 long double reference_cbrtl(long double x)
-    double yhi = HEX_DBL(+, 1, 5555555555555, -, 2);
-    double ylo = HEX_DBL(+, 1, 558, -, 56);
+    double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 );
+    double ylo = HEX_DBL( +, 1, 558, -, 56 );
-    double fabsx = reference_fabs(x);
+    double fabsx = reference_fabs( x );
-    if (isnan(x) || fabsx == 1.0 || fabsx == 0.0 || isinf(x)) return x;
+    if( isnan(x) || fabsx == 1.0 || fabsx == 0.0 || isinf(x) )
+        return x;
+    double iy = 0.0;
     double log2x_hi, log2x_lo;
-    // extended precision log .... accurate to at least 64-bits + couple of
-    // guard bits
+    // extended precision log .... accurate to at least 64-bits + couple of guard bits
     __log2_ep(&log2x_hi, &log2x_lo, fabsx);
     double ylog2x_hi, ylog2x_lo;
@@ -3287,44 +2993,79 @@
     MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo);
     long double powxy;
-    if (isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200))
-    {
-        powxy =
-            reference_signbit(ylog2x_hi) ? HEX_DBL(+, 0, 0, +, 0) : INFINITY;
-    }
-    else
-    {
+    if(isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200)) {
+        powxy = reference_signbit(ylog2x_hi) ? HEX_DBL( +, 0, 0, +, 0 ) : INFINITY;
+    } else {
         // separate integer + fractional part
         long int m = lrint(ylog2x_hi);
         AddDD(&ylog2x_hi, &ylog2x_lo, ylog2x_hi, ylog2x_lo, -m, 0.0);
         // revert to long double arithemtic
-        long double ylog2x = (long double)ylog2x_hi + (long double)ylog2x_lo;
-        powxy = reference_exp2l(ylog2x);
+        long double ylog2x = (long double) ylog2x_hi + (long double) ylog2x_lo;
+        powxy = reference_exp2l( ylog2x );
         powxy = reference_scalblnl(powxy, m);
-    return reference_copysignl(powxy, x);
+    return reference_copysignl( powxy, x );
-long double reference_rintl(long double x)
+long double scalbnl( long double x, int i )
+    //suitable for checking double precision scalbn only
+    if( i > 3000 )
+        return copysignl( INFINITY, x);
+    if( i < -3000 )
+        return copysignl( 0.0L, x);
+    if( i > 0 )
+    {
+        while( i >= 1000 )
+        {
+            x *= HEX_LDBL( +, 1, 0, +, 1000 );
+            i -= 1000;
+        }
+        union{ cl_ulong u; double d;}u;
+        u.u = (cl_ulong)( i + 1023 ) << 52;
+        x *= (long double) u.d;
+    }
+    else if( i < 0 )
+    {
+        while( i <= -1000 )
+        {
+            x *= HEX_LDBL( +, 1, 0, -, 1000 );
+            i += 1000;
+        }
+        union{ cl_ulong u; double d;}u;
+        u.u = (cl_ulong)( i + 1023 ) << 52;
+        x *= (long double) u.d;
+    }
+    return x;
+long double reference_rintl( long double x )
 #if defined(__PPC__)
-    // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
-    // mantissa can represent more than LDBL_MANT_DIG binary digits.
-    x = rintl(x);
+  // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
+  // mantissa can represent more than LDBL_MANT_DIG binary digits.
+  x = rintl(x);
-    static long double magic[2] = { 0.0L, 0.0L };
+    static long double magic[2] = { 0.0L, 0.0L};
-    if (0.0L == magic[0])
+    if( 0.0L == magic[0] )
         magic[0] = scalbnl(0.5L, LDBL_MANT_DIG);
         magic[1] = scalbnl(-0.5L, LDBL_MANT_DIG);
-    if (reference_fabsl(x) < magic[0] && x != 0.0L)
+    if( reference_fabsl(x) < magic[0] && x != 0.0L )
-        long double m = magic[x < 0];
+        long double m = magic[ x < 0 ];
         x += m;
         x -= m;
@@ -3337,7 +3078,7 @@
 static void __sqrt_ep(double *rhi, double *rlo, double xhi, double xlo)
     // approximate reciprocal sqrt
-    double thi = 1.0 / sqrt(xhi);
+    double thi = 1.0 / sqrt( xhi );
     double tlo = 0.0;
     // One newton iteration in double-double
@@ -3351,31 +3092,34 @@
     MulDD(rhi, rlo, yhi, ylo, xhi, xlo);
-long double reference_acoshl(long double x)
+long double reference_acoshl( long double x )
-    /*
-     * ====================================================
-     * This function derived from fdlibm
-     * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     *
-     */
-    if (isnan(x) || isinf(x)) return x + fabsl(x);
+ * ====================================================
+ * This function derived from fdlibm
+ * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ *
+ */
+    if( isnan(x) || isinf(x))
+        return x + fabsl(x);
-    if (x < 1.0L) return cl_make_nan();
+    if( x < 1.0L )
+        return cl_make_nan();
-    if (x == 1.0L) return 0.0L;
+    if( x == 1.0L )
+        return 0.0L;
-    if (x > HEX_LDBL(+, 1, 0, +, 60))
+    if( x > HEX_LDBL( +, 1, 0, +, 60 ) )
         return reference_logl(x) + 0.693147180559945309417232121458176568L;
-    if (x > 2.0L)
-        return reference_logl(2.0L * x - 1.0L / (x + sqrtl(x * x - 1.0L)));
+    if( x > 2.0L )
+        return reference_logl(2.0L * x - 1.0L / (x + sqrtl(x*x - 1.0L)));
     double hi, lo;
     MulD(&hi, &lo, x, x);
@@ -3384,290 +3128,286 @@
     AddDD(&hi, &lo, hi, lo, x, 0.0);
     double correction = lo / hi;
     __log2_ep(&hi, &lo, hi);
-    double log2Hi = HEX_DBL(+, 1, 62e42fefa39ef, -, 1);
-    double log2Lo = HEX_DBL(+, 1, abc9e3b39803f, -, 56);
+    double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 );
+    double log2Lo = HEX_DBL( +, 1, abc9e3b39803f, -, 56 );
     MulDD(&hi, &lo, hi, lo, log2Hi, log2Lo);
     AddDD(&hi, &lo, hi, lo, correction, 0.0);
     return hi + lo;
-long double reference_asinhl(long double x)
+long double reference_asinhl( long double x )
     long double cutoff = 0.0L;
-    const long double ln2 = HEX_LDBL(+, b, 17217f7d1cf79ab, -, 4);
+    const long double ln2 = HEX_LDBL( +, b, 17217f7d1cf79ab, -, 4 );
-    if (cutoff == 0.0L) cutoff = reference_ldexpl(1.0L, -LDBL_MANT_DIG);
+    if( cutoff == 0.0L )
+        cutoff = reference_ldexpl(1.0L, -LDBL_MANT_DIG);
-    if (isnan(x) || isinf(x)) return x + x;
+    if( isnan(x) || isinf(x) )
+        return x + x;
     long double absx = reference_fabsl(x);
-    if (absx < cutoff) return x;
+    if( absx < cutoff )
+        return x;
     long double sign = reference_copysignl(1.0L, x);
-    if (absx <= 4.0 / 3.0)
-    {
-        return sign
-            * reference_log1pl(absx + x * x / (1.0 + sqrtl(1.0 + x * x)));
+    if( absx <= 4.0/3.0 ) {
+        return sign * reference_log1pl( absx + x*x / (1.0 + sqrtl(1.0 + x*x)));
-    else if (absx <= HEX_LDBL(+, 1, 0, +, 27))
-    {
-        return sign
-            * reference_logl(2.0L * absx + 1.0L / (sqrtl(x * x + 1.0) + absx));
+    else if( absx <= HEX_LDBL( +, 1, 0, +, 27 ) ) {
+        return sign * reference_logl( 2.0L * absx + 1.0L / (sqrtl( x * x + 1.0 ) + absx));
-    else
-    {
-        return sign * (reference_logl(absx) + ln2);
+    else {
+        return sign * ( reference_logl( absx ) + ln2 );
-long double reference_atanhl(long double x)
+long double reference_atanhl( long double x )
-    /*
-     * ====================================================
-     * This function is from fdlibm:
-     *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     */
-    if (isnan(x)) return x + x;
+ * ====================================================
+ * This function is from fdlibm:
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if( isnan(x)  )
+        return x + x;
-    long double signed_half = reference_copysignl(0.5L, x);
+    long double signed_half = reference_copysignl( 0.5L, x );
     x = reference_fabsl(x);
-    if (x > 1.0L) return cl_make_nan();
+    if( x > 1.0L )
+        return cl_make_nan();
-    if (x < 0.5L)
-        return signed_half * reference_log1pl(2.0L * (x + x * x / (1 - x)));
+    if( x < 0.5L )
+        return signed_half * reference_log1pl( 2.0L * ( x + x*x / (1-x) ) );
-    return signed_half * reference_log1pl(2.0L * x / (1 - x));
+    return signed_half * reference_log1pl(2.0L * x / (1-x));
-long double reference_exp2l(long double z)
+long double reference_exp2l(  long double z)
     double_double x;
     int j;
     // Handle NaNs
-    if (isnan(z)) return z;
+    if( isnan(z) )
+        return z;
     // init x
     x.hi = z;
     x.lo = z - x.hi;
-    // Deal with overflow and underflow for exp2(x) stage next
-    if (x.hi >= 1025) return INFINITY;
+    //Deal with overflow and underflow for exp2(x) stage next
+    if( x.hi >= 1025 )
+        return INFINITY;
-    if (x.hi < -1075 - 24) return +0.0;
+    if( x.hi < -1075-24 )
+        return +0.0;
     // find nearest integer to x
-    int i = (int)rint(x.hi);
+    int i = (int) rint(x.hi);
     // x now holds fractional part.  The result would be then 2**i  * exp2( x )
     x.hi -= i;
-    // We could attempt to find a minimax polynomial for exp2(x) over the range
-    // x = [-0.5, 0.5]. However, this would converge very slowly near the
-    // extrema, where 0.5**n is not a lot different from 0.5**(n+1), thereby
-    // requiring something like a 20th order polynomial to get 53 + 24 bits of
-    // precision. Instead we further reduce the range to [-1/32, 1/32] by
-    // observing that
+    // We could attempt to find a minimax polynomial for exp2(x) over the range x = [-0.5, 0.5].
+    // However, this would converge very slowly near the extrema, where 0.5**n is not a lot different
+    // from 0.5**(n+1), thereby requiring something like a 20th order polynomial to get 53 + 24 bits
+    // of precision. Instead we further reduce the range to [-1/32, 1/32] by observing that
     //  2**(a+b) = 2**a * 2**b
-    // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and
-    // reduce the range of x to [-1/32, 1/32] by subtracting away the nearest
-    // value of n/16 from x.
-    const double_double corrections[17] = {
-        { HEX_DBL(+, 1, 6a09e667f3bcd, -, 1),
-          HEX_DBL(-, 1, bdd3413b26456, -, 55) },
-        { HEX_DBL(+, 1, 7a11473eb0187, -, 1),
-          HEX_DBL(-, 1, 41577ee04992f, -, 56) },
-        { HEX_DBL(+, 1, 8ace5422aa0db, -, 1),
-          HEX_DBL(+, 1, 6e9f156864b27, -, 55) },
-        { HEX_DBL(+, 1, 9c49182a3f09, -, 1),
-          HEX_DBL(+, 1, c7c46b071f2be, -, 57) },
-        { HEX_DBL(+, 1, ae89f995ad3ad, -, 1),
-          HEX_DBL(+, 1, 7a1cd345dcc81, -, 55) },
-        { HEX_DBL(+, 1, c199bdd85529c, -, 1),
-          HEX_DBL(+, 1, 11065895048dd, -, 56) },
-        { HEX_DBL(+, 1, d5818dcfba487, -, 1),
-          HEX_DBL(+, 1, 2ed02d75b3707, -, 56) },
-        { HEX_DBL(+, 1, ea4afa2a490da, -, 1),
-          HEX_DBL(-, 1, e9c23179c2893, -, 55) },
-        { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-        { HEX_DBL(+, 1, 0b5586cf9890f, +, 0),
-          HEX_DBL(+, 1, 8a62e4adc610b, -, 54) },
-        { HEX_DBL(+, 1, 172b83c7d517b, +, 0),
-          HEX_DBL(-, 1, 19041b9d78a76, -, 55) },
-        { HEX_DBL(+, 1, 2387a6e756238, +, 0),
-          HEX_DBL(+, 1, 9b07eb6c70573, -, 54) },
-        { HEX_DBL(+, 1, 306fe0a31b715, +, 0),
-          HEX_DBL(+, 1, 6f46ad23182e4, -, 55) },
-        { HEX_DBL(+, 1, 3dea64c123422, +, 0),
-          HEX_DBL(+, 1, ada0911f09ebc, -, 55) },
-        { HEX_DBL(+, 1, 4bfdad5362a27, +, 0),
-          HEX_DBL(+, 1, d4397afec42e2, -, 56) },
-        { HEX_DBL(+, 1, 5ab07dd485429, +, 0),
-          HEX_DBL(+, 1, 6324c054647ad, -, 54) },
-        { HEX_DBL(+, 1, 6a09e667f3bcd, +, 0),
-          HEX_DBL(-, 1, bdd3413b26456, -, 54) }
+    // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and reduce the range
+    // of x to [-1/32, 1/32] by subtracting away the nearest value of n/16 from x.
+    const double_double corrections[17] =
+    {
+        { HEX_DBL( +, 1, 6a09e667f3bcd, -, 1 ), HEX_DBL( -, 1, bdd3413b26456, -, 55 ) },
+        { HEX_DBL( +, 1, 7a11473eb0187, -, 1 ), HEX_DBL( -, 1, 41577ee04992f, -, 56 ) },
+        { HEX_DBL( +, 1, 8ace5422aa0db, -, 1 ), HEX_DBL( +, 1, 6e9f156864b27, -, 55 ) },
+        { HEX_DBL( +, 1, 9c49182a3f09,  -, 1 ), HEX_DBL( +, 1, c7c46b071f2be, -, 57 ) },
+        { HEX_DBL( +, 1, ae89f995ad3ad, -, 1 ), HEX_DBL( +, 1, 7a1cd345dcc81, -, 55 ) },
+        { HEX_DBL( +, 1, c199bdd85529c, -, 1 ), HEX_DBL( +, 1, 11065895048dd, -, 56 ) },
+        { HEX_DBL( +, 1, d5818dcfba487, -, 1 ), HEX_DBL( +, 1, 2ed02d75b3707, -, 56 ) },
+        { HEX_DBL( +, 1, ea4afa2a490da, -, 1 ), HEX_DBL( -, 1, e9c23179c2893, -, 55 ) },
+        { HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ) },
+        { HEX_DBL( +, 1, 0b5586cf9890f, +, 0 ), HEX_DBL( +, 1, 8a62e4adc610b, -, 54 ) },
+        { HEX_DBL( +, 1, 172b83c7d517b, +, 0 ), HEX_DBL( -, 1, 19041b9d78a76, -, 55 ) },
+        { HEX_DBL( +, 1, 2387a6e756238, +, 0 ), HEX_DBL( +, 1, 9b07eb6c70573, -, 54 ) },
+        { HEX_DBL( +, 1, 306fe0a31b715, +, 0 ), HEX_DBL( +, 1, 6f46ad23182e4, -, 55 ) },
+        { HEX_DBL( +, 1, 3dea64c123422, +, 0 ), HEX_DBL( +, 1, ada0911f09ebc, -, 55 ) },
+        { HEX_DBL( +, 1, 4bfdad5362a27, +, 0 ), HEX_DBL( +, 1, d4397afec42e2, -, 56 ) },
+        { HEX_DBL( +, 1, 5ab07dd485429, +, 0 ), HEX_DBL( +, 1, 6324c054647ad, -, 54 ) },
+        { HEX_DBL( +, 1, 6a09e667f3bcd, +, 0 ), HEX_DBL( -, 1, bdd3413b26456, -, 54 ) }
-    int index = (int)rint(x.hi * 16.0);
-    x.hi -= (double)index * 0.0625;
+    int index = (int) rint( x.hi * 16.0 );
+    x.hi -= (double) index * 0.0625;
     // canonicalize x
     double temp = x.hi;
     x.hi += x.lo;
     x.lo -= x.hi - temp;
-    // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32].  Max
-    // Error: 2 * 0x1.e112p-87
-    const double_double c[] = { { HEX_DBL(+, 1, 62e42fefa39ef, -, 1),
-                                  HEX_DBL(+, 1, abc9e3ac1d244, -, 56) },
-                                { HEX_DBL(+, 1, ebfbdff82c58f, -, 3),
-                                  HEX_DBL(-, 1, 5e4987a631846, -, 57) },
-                                { HEX_DBL(+, 1, c6b08d704a0c, -, 5),
-                                  HEX_DBL(-, 1, d323200a05713, -, 59) },
-                                { HEX_DBL(+, 1, 3b2ab6fba4e7a, -, 7),
-                                  HEX_DBL(+, 1, c5ee8f8b9f0c1, -, 63) },
-                                { HEX_DBL(+, 1, 5d87fe78a672a, -, 10),
-                                  HEX_DBL(+, 1, 884e5e5cc7ecc, -, 64) },
-                                { HEX_DBL(+, 1, 430912f7e8373, -, 13),
-                                  HEX_DBL(+, 1, 4f1b59514a326, -, 67) },
-                                { HEX_DBL(+, 1, ffcbfc5985e71, -, 17),
-                                  HEX_DBL(-, 1, db7d6a0953b78, -, 71) },
-                                { HEX_DBL(+, 1, 62c150eb16465, -, 20),
-                                  HEX_DBL(+, 1, e0767c2d7abf5, -, 80) },
-                                { HEX_DBL(+, 1, b52502b5e953, -, 24),
-                                  HEX_DBL(+, 1, 6797523f944bc, -, 78) } };
-    size_t count = sizeof(c) / sizeof(c[0]);
+    // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32].  Max Error: 2 * 0x1.e112p-87
+    const double_double c[] = {
+        {HEX_DBL( +, 1, 62e42fefa39ef, -,  1 ), HEX_DBL( +, 1, abc9e3ac1d244, -, 56 )},
+        {HEX_DBL( +, 1, ebfbdff82c58f, -,  3 ), HEX_DBL( -, 1, 5e4987a631846, -, 57 )},
+        {HEX_DBL( +, 1, c6b08d704a0c,  -,  5 ), HEX_DBL( -, 1, d323200a05713, -, 59 )},
+        {HEX_DBL( +, 1, 3b2ab6fba4e7a, -,  7 ), HEX_DBL( +, 1, c5ee8f8b9f0c1, -, 63 )},
+        {HEX_DBL( +, 1, 5d87fe78a672a, -, 10 ), HEX_DBL( +, 1, 884e5e5cc7ecc, -, 64 )},
+        {HEX_DBL( +, 1, 430912f7e8373, -, 13 ), HEX_DBL( +, 1, 4f1b59514a326, -, 67 )},
+        {HEX_DBL( +, 1, ffcbfc5985e71, -, 17 ), HEX_DBL( -, 1, db7d6a0953b78, -, 71 )},
+        {HEX_DBL( +, 1, 62c150eb16465, -, 20 ), HEX_DBL( +, 1, e0767c2d7abf5, -, 80 )},
+        {HEX_DBL( +, 1, b52502b5e953,  -, 24 ), HEX_DBL( +, 1, 6797523f944bc, -, 78 )}
+    };
+    size_t count = sizeof( c ) / sizeof( c[0] );
     // Do polynomial
-    double_double r = c[count - 1];
-    for (j = (int)count - 2; j >= 0; j--) r = add_dd(c[j], mul_dd(r, x));
+    double_double r = c[count-1];
+    for( j = (int) count-2; j >= 0; j-- )
+        r = add_dd( c[j], mul_dd( r, x ) );
     // unwind approximation
-    r = mul_dd(r, x); // before: r =(exp2(x)-1)/x;   after: r = exp2(x) - 1
+    r = mul_dd( r, x );     // before: r =(exp2(x)-1)/x;   after: r = exp2(x) - 1
     // correct for [-0.5, 0.5] -> [-1/32, 1/32] reduction above
     //  exp2(x) = (r + 1) * correction = r * correction + correction
-    r = mul_dd(r, corrections[index + 8]);
-    r = add_dd(r, corrections[index + 8]);
+    r = mul_dd( r, corrections[index+8] );
+    r = add_dd( r, corrections[index+8] );
-    // Format result for output:
+// Format result for output:
     // Get mantissa
-    long double m = ((long double)r.hi + (long double)r.lo);
+    long double m = ((long double) r.hi + (long double) r.lo );
     // Handle a pesky overflow cases when long double = double
-    if (i > 512)
+    if( i > 512 )
-        m *= HEX_DBL(+, 1, 0, +, 512);
+        m *= HEX_DBL( +, 1, 0, +, 512 );
         i -= 512;
-    else if (i < -512)
+    else if( i < -512 )
-        m *= HEX_DBL(+, 1, 0, -, 512);
+        m *= HEX_DBL( +, 1, 0, -, 512 );
         i += 512;
-    return m * ldexpl(1.0L, i);
+    return m * ldexpl( 1.0L, i );
-long double reference_expm1l(long double x)
+long double reference_expm1l(  long double x)
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    // unimplemented
+#if defined( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    //unimplemented
     return x;
-    if (reference_isnanl(x)) return x;
+    union { double f; cl_ulong u;} u;
+    u.f = (double) x;
-    if (x > 710) return INFINITY;
+    if (reference_isnanl(x))
+        return x;
+    if ( x > 710 )
+        return INFINITY;
     long double y = expm1l(x);
     // Range of expm1l is -1.0L to +inf. Negative inf
     // on a few Linux platforms is clearly the wrong sign.
-    if (reference_isinfl(y)) y = INFINITY;
+    if (reference_isinfl(y))
+        y = INFINITY;
     return y;
-long double reference_fmaxl(long double x, long double y)
+long double reference_fmaxl( long double x, long double y )
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
     return x >= y ? x : y;
-long double reference_fminl(long double x, long double y)
+long double reference_fminl( long double x, long double y )
-    if (isnan(y)) return x;
+    if( isnan(y) )
+        return x;
     return x <= y ? x : y;
-long double reference_hypotl(long double x, long double y)
+long double reference_hypotl( long double x, long double y )
-    static const double tobig = HEX_DBL(+, 1, 0, +, 511);
-    static const double big = HEX_DBL(+, 1, 0, +, 513);
-    static const double rbig = HEX_DBL(+, 1, 0, -, 513);
-    static const double tosmall = HEX_DBL(+, 1, 0, -, 511);
-    static const double smalll = HEX_DBL(+, 1, 0, -, 607);
-    static const double rsmall = HEX_DBL(+, 1, 0, +, 607);
+  static const double tobig = HEX_DBL( +, 1, 0, +, 511 );
+  static const double big = HEX_DBL( +, 1, 0, +, 513 );
+  static const double rbig = HEX_DBL( +, 1, 0, -, 513 );
+  static const double tosmall = HEX_DBL( +, 1, 0, -, 511 );
+  static const double smalll = HEX_DBL( +, 1, 0, -, 607 );
+  static const double rsmall = HEX_DBL( +, 1, 0, +, 607 );
     long double max, min;
-    if (isinf(x) || isinf(y)) return INFINITY;
+    if( isinf(x) || isinf(y) )
+        return INFINITY;
-    if (isnan(x) || isnan(y)) return x + y;
+    if( isnan(x) || isnan(y) )
+        return x + y;
     x = reference_fabsl(x);
     y = reference_fabsl(y);
-    max = reference_fmaxl(x, y);
-    min = reference_fminl(x, y);
+    max = reference_fmaxl( x, y );
+    min = reference_fminl( x, y );
-    if (max > tobig)
+  if( max > tobig )
         max *= rbig;
         min *= rbig;
-        return big * sqrtl(max * max + min * min);
+        return big * sqrtl( max * max + min * min );
-    if (max < tosmall)
+  if( max < tosmall )
         max *= rsmall;
         min *= rsmall;
-        return smalll * sqrtl(max * max + min * min);
+      return smalll * sqrtl( max * max + min * min );
-    return sqrtl(x * x + y * y);
+    return sqrtl( x * x + y * y );
-long double reference_log2l(long double x)
+//long double reference_log2l( long double x )
+//    return log( x ) * 1.44269504088896340735992468100189214L;
+long double reference_log2l( long double x )
-    if (isnan(x) || x < 0.0 || x == -INFINITY) return NAN;
+    if( isnan(x) || x < 0.0 || x == -INFINITY)
+        return NAN;
-    if (x == 0.0f) return -INFINITY;
+    if( x == 0.0f)
+        return -INFINITY;
-    if (x == INFINITY) return INFINITY;
+    if( x == INFINITY )
+        return INFINITY;
     double hi, lo;
-    __log2_ep(&hi, &lo, x);
+    __log2_ep( &hi, &lo, x);
-    return (long double)hi + (long double)lo;
+    return (long double) hi + (long double) lo;
-long double reference_log1pl(long double x)
+long double reference_log1pl(  long double x)
-#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-    // unimplemented
+#if defined( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    //unimplemented
     return x;
 #elif defined(__PPC__)
     // log1pl on PPC inadvertantly returns NaN for very large values. Work
@@ -3678,24 +3418,23 @@
-long double reference_logbl(long double x)
+long double reference_logbl( long double x )
     // Since we are just using this to verify double precision, we can
     // use the double precision copysign here
-    union {
-        double f;
-        cl_ulong u;
-    } u;
-    u.f = (double)x;
+    union { double f; cl_ulong u;} u;
+    u.f = (double) x;
     cl_int exponent = (cl_uint)(u.u >> 52) & 0x7ff;
-    if (exponent == 0x7ff) return x * x;
+    if( exponent == 0x7ff )
+        return x * x;
-    if (exponent == 0)
-    { // deal with denormals
-        u.f = x * HEX_DBL(+, 1, 0, +, 64);
+    if( exponent == 0 )
+    {   // deal with denormals
+        u.f =  x * HEX_DBL( +, 1, 0, +, 64 );
         exponent = (cl_int)(u.u >> 52) & 0x7ff;
-        if (exponent == 0) return -INFINITY;
+        if( exponent == 0 )
+            return -INFINITY;
         return exponent - (1023 + 64);
@@ -3703,82 +3442,84 @@
     return exponent - 1023;
-long double reference_maxmagl(long double x, long double y)
+long double reference_maxmagl( long double x, long double y )
     long double fabsx = fabsl(x);
     long double fabsy = fabsl(y);
-    if (fabsx < fabsy) return y;
+    if( fabsx < fabsy )
+        return y;
-    if (fabsy < fabsx) return x;
+    if( fabsy < fabsx )
+        return x;
     return reference_fmaxl(x, y);
-long double reference_minmagl(long double x, long double y)
+long double reference_minmagl( long double x, long double y )
     long double fabsx = fabsl(x);
     long double fabsy = fabsl(y);
-    if (fabsx > fabsy) return y;
+    if( fabsx > fabsy )
+        return y;
-    if (fabsy > fabsx) return x;
+    if( fabsy > fabsx )
+        return x;
     return reference_fminl(x, y);
-long double reference_nanl(cl_ulong x)
+long double reference_nanl( cl_ulong x )
-    union {
-        cl_ulong u;
-        cl_double f;
-    } u;
+    union{ cl_ulong u; cl_double f; }u;
     u.u = x | 0x7ff8000000000000ULL;
-    return (long double)u.f;
+    return (long double) u.f;
-long double reference_reciprocall(long double x) { return 1.0L / x; }
+long double reference_reciprocall( long double x )
+    return 1.0L / x;
-long double reference_remainderl(long double x, long double y)
+long double reference_remainderl( long double x, long double y );
+long double reference_remainderl( long double x, long double y )
     int i;
-    return reference_remquol(x, y, &i);
+    return reference_remquol( x, y, &i );
-long double reference_lgammal(long double x)
+long double reference_lgammal( long double x);
+long double reference_lgammal( long double x)
     // lgamma is currently not tested
-    return reference_lgamma(x);
+    return reference_lgamma( x );
-static uint32_t two_over_pi[] = {
-    0x0,        0x28be60db, 0x24e44152, 0x27f09d5f, 0x11f534dd, 0x3036d8a5,
-    0x1993c439, 0x107f945,  0x23abdebb, 0x31586dc9, 0x6e3a424,  0x374b8019,
-    0x92eea09,  0x3464873f, 0x21deb1cb, 0x4a69cfb,  0x288235f5, 0xbaed121,
-    0xe99c702,  0x1ad17df9, 0x13991d6,  0xe60d4ce,  0x1f49c845, 0x3e2ef7e4,
-    0x283b1ff8, 0x25fff781, 0x1980fef2, 0x3c462d68, 0xa6d1f6d,  0xd9fb3c9,
-    0x3cb09b74, 0x3d18fd9a, 0x1e5fea2d, 0x1d49eeb1, 0x3ebe5f17, 0x2cf41ce7,
-    0x378a5292, 0x3a9afed7, 0x3b11f8d5, 0x3421580c, 0x3046fc7b, 0x1aeafc33,
-    0x3bc209af, 0x10d876a7, 0x2391615e, 0x3986c219, 0x199855f1, 0x1281a102,
-    0xdffd880,  0x135cc9cc, 0x10606155
+static uint32_t two_over_pi[] = { 0x0, 0x28be60db, 0x24e44152, 0x27f09d5f, 0x11f534dd, 0x3036d8a5, 0x1993c439, 0x107f945, 0x23abdebb, 0x31586dc9,
+0x6e3a424, 0x374b8019, 0x92eea09, 0x3464873f, 0x21deb1cb, 0x4a69cfb, 0x288235f5, 0xbaed121, 0xe99c702, 0x1ad17df9,
+0x13991d6, 0xe60d4ce, 0x1f49c845, 0x3e2ef7e4, 0x283b1ff8, 0x25fff781, 0x1980fef2, 0x3c462d68, 0xa6d1f6d, 0xd9fb3c9,
+0x3cb09b74, 0x3d18fd9a, 0x1e5fea2d, 0x1d49eeb1, 0x3ebe5f17, 0x2cf41ce7, 0x378a5292, 0x3a9afed7, 0x3b11f8d5, 0x3421580c,
+0x3046fc7b, 0x1aeafc33, 0x3bc209af, 0x10d876a7, 0x2391615e, 0x3986c219, 0x199855f1, 0x1281a102, 0xdffd880, 0x135cc9cc,
-static uint32_t pi_over_two[] = { 0x1,        0x2487ed51, 0x42d1846,
-                                  0x26263314, 0x1701b839, 0x28948127 };
+static uint32_t pi_over_two[] = { 0x1, 0x2487ed51, 0x42d1846, 0x26263314, 0x1701b839, 0x28948127 };
-typedef union {
-    uint64_t u;
-    double d;
-} d_ui64_t;
+typedef union
+    {
+        uint64_t u;
+        double   d;
+    }d_ui64_t;
 // radix or base of representation
 #define RADIX (30)
 #define DIGITS 6
-d_ui64_t two_pow_pradix = { (uint64_t)(1023 + RADIX) << 52 };
-d_ui64_t two_pow_mradix = { (uint64_t)(1023 - RADIX) << 52 };
-d_ui64_t two_pow_two_mradix = { (uint64_t)(1023 - 2 * RADIX) << 52 };
+d_ui64_t two_pow_pradix = { (uint64_t) (1023 + RADIX) << 52 };
+d_ui64_t two_pow_mradix = { (uint64_t) (1023 - RADIX) << 52 };
+d_ui64_t two_pow_two_mradix = { (uint64_t) (1023-2*RADIX) << 52 };
 #define tp_pradix two_pow_pradix.d
 #define tp_mradix two_pow_mradix.d
@@ -3787,28 +3528,27 @@
 // floating point number.
 // x = sign * [ sum_{i = 0 to 2} ( X[i] * 2^(index - i)*RADIX ) ]
 typedef struct
-    uint32_t X[3]; // three 32 bit integers are sufficient to represnt double in
-                   // base_30
-    int index; // exponent bias
-    int sign; // sign of double
-} eprep_t;
+    {
+        uint32_t X[3];        // three 32 bit integers are sufficient to represnt double in base_30
+        int index;            // exponent bias
+        int sign;            // sign of double
+    }eprep_t;
+static eprep_t double_to_eprep(double x);
 static eprep_t double_to_eprep(double x)
     eprep_t result;
-    result.sign = (signbit(x) == 0) ? 1 : -1;
-    x = fabs(x);
+    result.sign = (signbit( x ) == 0) ? 1 : -1;
+    x = fabs( x );
     int index = 0;
-    while (x > tp_pradix)
-    {
+    while( x > tp_pradix ) {
         x *= tp_mradix;
-    while (x < 1)
-    {
+    while( x < 1 ) {
         x *= tp_pradix;
@@ -3816,125 +3556,190 @@
     result.index = index;
     int i = 0;
     result.X[0] = result.X[1] = result.X[2] = 0;
-    while (x != 0.0)
-    {
-        result.X[i] = (uint32_t)x;
-        x = (x - (double)result.X[i]) * tp_pradix;
+    while( x != 0.0 ) {
+        result.X[i] = (uint32_t) x;
+        x = (x - (double) result.X[i]) * tp_pradix;
     return result;
-static double eprep_to_double(eprep_t epx)
+ double eprep_to_double( uint32_t *R, int digits, int index, int sgn )
+ {
+ d_ui64_t nb, rndcorr;
+ uint64_t lowpart, roundbits, t1;
+ int expo, expofinal, shift;
+ double res;
+ nb.d = (double) R[0];
+ t1   = R[1];
+ lowpart  = (t1 << RADIX) + R[2];
+ expo = ((nb.u & 0x7ff0000000000000ULL) >> 52) - 1023;
+ expofinal = expo + RADIX*index;
+ if (expofinal >  1023) {
+ d_ui64_t inf = { 0x7ff0000000000000ULL };
+ res = inf.d;
+ }
+ else if (expofinal >= -1022){
+ shift = expo + 2*RADIX - 53;
+ roundbits = lowpart << (64-shift);
+ lowpart = lowpart >> shift;
+ if (lowpart & 0x0000000000000001ULL) {
+ if(roundbits == 0) {
+ int i;
+ for (i=3; i < digits; i++)
+ roundbits = roundbits | R[i];
+ }
+ if(roundbits == 0) {
+ if (lowpart & 0x0000000000000002ULL)
+ rndcorr.u = (uint64_t) (expo - 52 + 1023) << 52;
+ else
+ rndcorr.d = 0.0;
+ }
+ else
+ rndcorr.u = (uint64_t) (expo - 52 + 1023) << 52;
+ }
+ else{
+ rndcorr.d = 0.0;
+ }
+ lowpart = lowpart >> 1;
+ nb.u = nb.u | lowpart;
+ res  = nb.d + rndcorr.d;
+ if(index*RADIX + 1023 > 0) {
+ nb.u = 0;
+ nb.u = (uint64_t) (index*RADIX + 1023) << 52;
+ res *= nb.d;
+ }
+ else {
+ nb.u = 0;
+ nb.u = (uint64_t) (index*RADIX + 1023 + 2*RADIX) << 52;
+ res *= two_pow_two_mradix.d;
+ res *= nb.d;
+ }
+ }
+ else {
+ if (expofinal < -1022 - 53 ) {
+ res = 0.0;
+ }
+ else {
+ lowpart = lowpart >> (expo + (2*RADIX) - 52);
+ nb.u = nb.u | lowpart;
+ nb.u = (nb.u & 0x000FFFFFFFFFFFFFULL) | 0x0010000000000000ULL;
+ nb.u = nb.u >> (-1023 - expofinal);
+ if(nb.u & 0x0000000000000001ULL)
+ rndcorr.u = 1;
+ else
+ rndcorr.d = 0.0;
+ res  = 0.5*(nb.d + rndcorr.d);
+ }
+ }
+ return sgn*res;
+ }
+ */
+static double eprep_to_double( eprep_t epx );
+static double eprep_to_double( eprep_t epx )
     double res = 0.0;
-    res += ldexp((double)epx.X[0], (epx.index - 0) * RADIX);
-    res += ldexp((double)epx.X[1], (epx.index - 1) * RADIX);
-    res += ldexp((double)epx.X[2], (epx.index - 2) * RADIX);
+    res += ldexp((double) epx.X[0], (epx.index - 0)*RADIX);
+    res += ldexp((double) epx.X[1], (epx.index - 1)*RADIX);
+    res += ldexp((double) epx.X[2], (epx.index - 2)*RADIX);
     return copysign(res, epx.sign);
-static int payne_hanek(double *y, int *exception)
+static int payne_hanek( double *y, int *exception );
+static int payne_hanek( double *y, int *exception )
     double x = *y;
     // exception cases .. no reduction required
-    if (isnan(x) || isinf(x) || (fabs(x) <= M_PI_4))
-    {
+    if( isnan( x ) || isinf( x ) || (fabs( x ) <= M_PI_4) ) {
         *exception = 1;
         return 0;
     *exception = 0;
-    // After computation result[0] contains integer part while
-    // result[1]....result[DIGITS-1] contain fractional part. So we are doing
-    // computation with (DIGITS-1)*RADIX precision. Default DIGITS=6 and
-    // RADIX=30 so default precision is 150 bits. Kahan-McDonald algorithm shows
-    // that a double precision x, closest to pi/2 is 6381956970095103 x 2^797
-    // which can cause 61 digits of cancellation in computation of f = x*2/pi -
-    // floor(x*2/pi) ... thus we need at least 114 bits (61 leading zeros + 53
-    // bits of mentissa of f) of precision to accurately compute f in double
-    // precision. Since we are using 150 bits (still an overkill), we should be
-    // safe. Extra bits can act as guard bits for correct rounding.
-    uint64_t result[DIGITS + 2];
+    // After computation result[0] contains integer part while result[1]....result[DIGITS-1]
+    // contain fractional part. So we are doing computation with (DIGITS-1)*RADIX precision.
+    // Default DIGITS=6 and RADIX=30 so default precision is 150 bits. Kahan-McDonald algorithm
+    // shows that a double precision x, closest to pi/2 is 6381956970095103 x 2^797 which can
+    // cause 61 digits of cancellation in computation of f = x*2/pi - floor(x*2/pi) ... thus we need
+    // at least 114 bits (61 leading zeros + 53 bits of mentissa of f) of precision to accurately compute
+    // f in double precision. Since we are using 150 bits (still an overkill), we should be safe. Extra
+    // bits can act as guard bits for correct rounding.
+    uint64_t result[DIGITS+2];
     // compute extended precision representation of x
-    eprep_t epx = double_to_eprep(x);
+    eprep_t epx = double_to_eprep( x );
     int index = epx.index;
     int i, j;
-    // extended precision multiplication of 2/pi*x .... we will loose at max two
-    // RADIX=30 bit digits in the worst case
-    for (i = 0; i < (DIGITS + 2); i++)
-    {
+    // extended precision multiplication of 2/pi*x .... we will loose at max two RADIX=30 bit digits in
+    // the worst case
+    for(i = 0; i < (DIGITS+2); i++) {
         result[i] = 0;
-        result[i] += ((index + i - 0) >= 0)
-            ? ((uint64_t)two_over_pi[index + i - 0] * (uint64_t)epx.X[0])
-            : 0;
-        result[i] += ((index + i - 1) >= 0)
-            ? ((uint64_t)two_over_pi[index + i - 1] * (uint64_t)epx.X[1])
-            : 0;
-        result[i] += ((index + i - 2) >= 0)
-            ? ((uint64_t)two_over_pi[index + i - 2] * (uint64_t)epx.X[2])
-            : 0;
+        result[i] += ((index + i - 0) >= 0) ? ((uint64_t) two_over_pi[index + i - 0] * (uint64_t) epx.X[0]) : 0;
+        result[i] += ((index + i - 1) >= 0) ? ((uint64_t) two_over_pi[index + i - 1] * (uint64_t) epx.X[1]) : 0;
+        result[i] += ((index + i - 2) >= 0) ? ((uint64_t) two_over_pi[index + i - 2] * (uint64_t) epx.X[2]) : 0;
     // Carry propagation.
     uint64_t tmp;
-    for (i = DIGITS + 2 - 1; i > 0; i--)
-    {
+    for(i = DIGITS+2-1; i > 0; i--) {
         tmp = result[i] >> RADIX;
         result[i - 1] += tmp;
         result[i] -= (tmp << RADIX);
-    // we dont ned to normalize the integer part since only last two bits of
-    // this will be used subsequently algorithm which remain unaltered by this
-    // normalization. tmp = result[0] >> RADIX; result[0] -= (tmp << RADIX);
-    unsigned int N = (unsigned int)result[0];
+    // we dont ned to normalize the integer part since only last two bits of this will be used
+    // subsequently algorithm which remain unaltered by this normalization.
+    // tmp = result[0] >> RADIX;
+    // result[0] -= (tmp << RADIX);
+    unsigned int N = (unsigned int) result[0];
-    // if the result is > pi/4, bring it to (-pi/4, pi/4] range. Note that
-    // testing if the final x_star = pi/2*(x*2/pi - k) > pi/4 is equivalent to
-    // testing, at this stage, if r[1] (the first fractional digit) is greater
-    // than (2^RADIX)/2 and substracting pi/4 from x_star to bring it to
-    // mentioned range is equivalent to substracting fractional part at this
-    // stage from one and changing the sign.
+    // if the result is > pi/4, bring it to (-pi/4, pi/4] range. Note that testing if the final
+    // x_star = pi/2*(x*2/pi - k) > pi/4 is equivalent to testing, at this stage, if r[1] (the first fractional
+    // digit) is greater than (2^RADIX)/2 and substracting pi/4 from x_star to bring it to mentioned
+    // range is equivalent to substracting fractional part at this stage from one and changing the sign.
     int sign = 1;
-    if (result[1] > (uint64_t)(1 << (RADIX - 1)))
-    {
-        for (i = 1; i < (DIGITS + 2); i++)
+    if(result[1] > (uint64_t)(1 << (RADIX - 1))) {
+        for(i = 1; i < (DIGITS + 2); i++)
             result[i] = (~((unsigned int)result[i]) & 0x3fffffff);
         N += 1;
         sign = -1;
-    // Again as per Kahan-McDonald algorithim there may be 61 leading zeros in
-    // the worst case (when x is multiple of 2/pi very close to an integer) so
-    // we need to get rid of these zeros and adjust the index of final result.
-    // So in the worst case, precision of comupted result is 90 bits (150 bits
-    // original bits - 60 lost in cancellation).
+    // Again as per Kahan-McDonald algorithim there may be 61 leading zeros in the worst case
+    // (when x is multiple of 2/pi very close to an integer) so we need to get rid of these zeros
+    // and adjust the index of final result. So in the worst case, precision of comupted result is
+    // 90 bits (150 bits original bits - 60 lost in cancellation).
     int ind = 1;
-    for (i = 1; i < (DIGITS + 2); i++)
-    {
-        if (result[i] != 0)
+    for(i = 1; i < (DIGITS+2); i++) {
+        if(result[i] != 0)
-    uint64_t r[DIGITS - 1];
-    for (i = 0; i < (DIGITS - 1); i++)
-    {
+    uint64_t r[DIGITS-1];
+    for(i = 0; i < (DIGITS-1); i++) {
         r[i] = 0;
-        for (j = 0; j <= i; j++)
-        {
-            r[i] += (result[ind + i - j] * (uint64_t)pi_over_two[j]);
+        for(j = 0; j <= i; j++) {
+            r[i] += (result[ind+i-j] * (uint64_t) pi_over_two[j]);
-    for (i = (DIGITS - 2); i > 0; i--)
-    {
+    for(i = (DIGITS-2); i > 0; i--) {
         tmp = r[i] >> RADIX;
         r[i - 1] += tmp;
         r[i] -= (tmp << RADIX);
@@ -3943,127 +3748,147 @@
     r[0] -= (tmp << RADIX);
     eprep_t epr;
-    epr.sign = epx.sign * sign;
-    if (tmp != 0)
-    {
+    epr.sign = epx.sign*sign;
+    if(tmp != 0) {
         epr.index = -ind + 1;
-        epr.X[0] = (uint32_t)tmp;
-        epr.X[1] = (uint32_t)r[0];
-        epr.X[2] = (uint32_t)r[1];
+        epr.X[0] = (uint32_t) tmp;
+        epr.X[1] = (uint32_t) r[0];
+        epr.X[2] = (uint32_t) r[1];
-    else
-    {
+    else {
         epr.index = -ind;
-        epr.X[0] = (uint32_t)r[0];
-        epr.X[1] = (uint32_t)r[1];
-        epr.X[2] = (uint32_t)r[2];
+        epr.X[0] = (uint32_t) r[0];
+        epr.X[1] = (uint32_t) r[1];
+        epr.X[2] = (uint32_t) r[2];
-    *y = eprep_to_double(epr);
-    return epx.sign * N;
+    *y = eprep_to_double( epr );
+    return epx.sign*N;
 double reference_relaxed_cos(double x)
-    if (isnan(x)) return NAN;
-    return (float)cos((float)x);
+  if(isnan(x))
+    return NAN;
+  return (float)cos((float)x);
 double reference_cos(double x)
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return cos(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return cos( x );
     unsigned int c = N & 3;
-    switch (c)
-    {
-        case 0: return cos(x);
-        case 1: return -sin(x);
-        case 2: return -cos(x);
-        case 3: return sin(x);
+    switch ( c ) {
+        case 0:
+            return  cos( x );
+        case 1:
+            return -sin( x );
+        case 2:
+            return -cos( x );
+        case 3:
+            return  sin( x );
     return 0.0;
-double reference_relaxed_sin(double x) { return (float)sin((float)x); }
+double reference_relaxed_sin(double x){
+  return (float)sin((float)x);
 double reference_sin(double x)
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return sin(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return sin( x );
     int c = N & 3;
-    switch (c)
-    {
-        case 0: return sin(x);
-        case 1: return cos(x);
-        case 2: return -sin(x);
-        case 3: return -cos(x);
+    switch ( c ) {
+        case 0:
+            return  sin( x );
+        case 1:
+            return  cos( x );
+        case 2:
+            return -sin( x );
+        case 3:
+            return -cos( x );
     return 0.0;
-double reference_relaxed_sincos(double x, double *y)
-    *y = reference_relaxed_cos(x);
-    return reference_relaxed_sin(x);
+double reference_relaxed_sincos(double x, double * y){
+  *y = reference_relaxed_cos(x);
+  return reference_relaxed_sin(x);
 double reference_sincos(double x, double *y)
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception)
-    {
-        *y = cos(x);
-        return sin(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception ) {
+        *y = cos( x );
+        return sin( x );
     int c = N & 3;
-    switch (c)
-    {
-        case 0: *y = cos(x); return sin(x);
-        case 1: *y = -sin(x); return cos(x);
-        case 2: *y = -cos(x); return -sin(x);
-        case 3: *y = sin(x); return -cos(x);
+    switch ( c ) {
+        case 0:
+            *y = cos( x );
+            return  sin( x );
+        case 1:
+            *y = -sin( x );
+            return  cos( x );
+        case 2:
+            *y = -cos( x );
+            return -sin( x );
+        case 3:
+            *y = sin( x );
+            return -cos( x );
     return 0.0;
-double reference_relaxed_tan(double x)
-    return ((float)reference_relaxed_sin((float)x))
-        / ((float)reference_relaxed_cos((float)x));
+double reference_relaxed_tan(double x){
+  return ((float) reference_relaxed_sin((float)x))/((float) reference_relaxed_cos((float)x));
 double reference_tan(double x)
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return tan(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return tan( x );
     int c = N & 3;
-    switch (c)
-    {
-        case 0: return tan(x);
-        case 1: return -1.0 / tan(x);
-        case 2: return tan(x);
-        case 3: return -1.0 / tan(x);
+    switch ( c ) {
+        case 0:
+            return  tan( x );
+        case 1:
+            return -1.0 / tan( x );
+        case 2:
+            return tan( x );
+        case 3:
+            return -1.0 / tan( x );
     return 0.0;
 long double reference_cosl(long double xx)
-    double x = (double)xx;
+    double x = (double) xx;
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return cosl(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return cosl( x );
     unsigned int c = N & 3;
-    switch (c)
-    {
-        case 0: return cosl(x);
-        case 1: return -sinl(x);
-        case 2: return -cosl(x);
-        case 3: return sinl(x);
+    switch ( c ) {
+        case 0:
+            return  cosl( x );
+        case 1:
+            return -sinl( x );
+        case 2:
+            return -cosl( x );
+        case 3:
+            return  sinl( x );
     return 0.0;
@@ -4072,20 +3897,25 @@
     // we use system tanl after reduction which
     // can flush denorm input to zero so
-    // take care of it here.
-    if (reference_fabsl(xx) < HEX_DBL(+, 1, 0, -, 1022)) return xx;
+    //take care of it here.
+    if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 ))
+        return xx;
-    double x = (double)xx;
+    double x = (double) xx;
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return sinl(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return sinl( x );
     int c = N & 3;
-    switch (c)
-    {
-        case 0: return sinl(x);
-        case 1: return cosl(x);
-        case 2: return -sinl(x);
-        case 3: return -cosl(x);
+    switch ( c ) {
+        case 0:
+            return  sinl( x );
+        case 1:
+            return  cosl( x );
+        case 2:
+            return -sinl( x );
+        case 3:
+            return -cosl( x );
     return 0.0;
@@ -4094,28 +3924,34 @@
     // we use system tanl after reduction which
     // can flush denorm input to zero so
-    // take care of it here.
-    if (reference_fabsl(xx) < HEX_DBL(+, 1, 0, -, 1022))
+    //take care of it here.
+    if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 ))
         *y = cosl(xx);
         return xx;
-    double x = (double)xx;
+    double x = (double) xx;
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception)
-    {
-        *y = cosl(x);
-        return sinl(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception ) {
+        *y = cosl( x );
+        return sinl( x );
     int c = N & 3;
-    switch (c)
-    {
-        case 0: *y = cosl(x); return sinl(x);
-        case 1: *y = -sinl(x); return cosl(x);
-        case 2: *y = -cosl(x); return -sinl(x);
-        case 3: *y = sinl(x); return -cosl(x);
+    switch ( c ) {
+        case 0:
+            *y = cosl( x );
+            return  sinl( x );
+        case 1:
+            *y = -sinl( x );
+            return  cosl( x );
+        case 2:
+            *y = -cosl( x );
+            return -sinl( x );
+        case 3:
+            *y = sinl( x );
+            return -cosl( x );
     return 0.0;
@@ -4124,337 +3960,205 @@
     // we use system tanl after reduction which
     // can flush denorm input to zero so
-    // take care of it here.
-    if (reference_fabsl(xx) < HEX_DBL(+, 1, 0, -, 1022)) return xx;
+    //take care of it here.
+    if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 ))
+        return xx;
-    double x = (double)xx;
+    double x = (double) xx;
     int exception;
-    int N = payne_hanek(&x, &exception);
-    if (exception) return tanl(x);
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return tanl( x );
     int c = N & 3;
-    switch (c)
-    {
-        case 0: return tanl(x);
-        case 1: return -1.0 / tanl(x);
-        case 2: return tanl(x);
-        case 3: return -1.0 / tanl(x);
+    switch ( c ) {
+        case 0:
+            return  tanl( x );
+        case 1:
+            return -1.0 / tanl( x );
+        case 2:
+            return tanl( x );
+        case 3:
+            return -1.0 / tanl( x );
     return 0.0;
 static double __loglTable1[64][3] = {
-    { HEX_DBL(+, 1, 5390948f40fea, +, 0), HEX_DBL(-, 1, a152f142a, -, 2),
-      HEX_DBL(+, 1, f93e27b43bd2c, -, 40) },
-    { HEX_DBL(+, 1, 5015015015015, +, 0), HEX_DBL(-, 1, 921800925, -, 2),
-      HEX_DBL(+, 1, 162432a1b8df7, -, 41) },
-    { HEX_DBL(+, 1, 4cab88725af6e, +, 0), HEX_DBL(-, 1, 8304d90c18, -, 2),
-      HEX_DBL(+, 1, 80bb749056fe7, -, 40) },
-    { HEX_DBL(+, 1, 49539e3b2d066, +, 0), HEX_DBL(-, 1, 7418acebc, -, 2),
-      HEX_DBL(+, 1, ceac7f0607711, -, 43) },
-    { HEX_DBL(+, 1, 460cbc7f5cf9a, +, 0), HEX_DBL(-, 1, 6552b49988, -, 2),
-      HEX_DBL(+, 1, d8913d0e89fa, -, 42) },
-    { HEX_DBL(+, 1, 42d6625d51f86, +, 0), HEX_DBL(-, 1, 56b22e6b58, -, 2),
-      HEX_DBL(+, 1, c7eaf515033a1, -, 44) },
-    { HEX_DBL(+, 1, 3fb013fb013fb, +, 0), HEX_DBL(-, 1, 48365e696, -, 2),
-      HEX_DBL(+, 1, 434adcde7edc7, -, 41) },
-    { HEX_DBL(+, 1, 3c995a47babe7, +, 0), HEX_DBL(-, 1, 39de8e156, -, 2),
-      HEX_DBL(+, 1, 8246f8e527754, -, 40) },
-    { HEX_DBL(+, 1, 3991c2c187f63, +, 0), HEX_DBL(-, 1, 2baa0c34c, -, 2),
-      HEX_DBL(+, 1, e1513c28e180d, -, 42) },
-    { HEX_DBL(+, 1, 3698df3de0747, +, 0), HEX_DBL(-, 1, 1d982c9d58, -, 2),
-      HEX_DBL(+, 1, 63ea3fed4b8a2, -, 40) },
-    { HEX_DBL(+, 1, 33ae45b57bcb1, +, 0), HEX_DBL(-, 1, 0fa848045, -, 2),
-      HEX_DBL(+, 1, 32ccbacf1779b, -, 40) },
-    { HEX_DBL(+, 1, 30d190130d19, +, 0), HEX_DBL(-, 1, 01d9bbcfa8, -, 2),
-      HEX_DBL(+, 1, e2bfeb2b884aa, -, 42) },
-    { HEX_DBL(+, 1, 2e025c04b8097, +, 0), HEX_DBL(-, 1, e857d3d37, -, 3),
-      HEX_DBL(+, 1, d9309b4d2ea85, -, 40) },
-    { HEX_DBL(+, 1, 2b404ad012b4, +, 0), HEX_DBL(-, 1, cd3c712d4, -, 3),
-      HEX_DBL(+, 1, ddf360962d7ab, -, 40) },
-    { HEX_DBL(+, 1, 288b01288b012, +, 0), HEX_DBL(-, 1, b2602497e, -, 3),
-      HEX_DBL(+, 1, 597f8a121640f, -, 40) },
-    { HEX_DBL(+, 1, 25e22708092f1, +, 0), HEX_DBL(-, 1, 97c1cb13d, -, 3),
-      HEX_DBL(+, 1, 02807d15580dc, -, 40) },
-    { HEX_DBL(+, 1, 23456789abcdf, +, 0), HEX_DBL(-, 1, 7d60496d, -, 3),
-      HEX_DBL(+, 1, 12ce913d7a827, -, 41) },
-    { HEX_DBL(+, 1, 20b470c67c0d8, +, 0), HEX_DBL(-, 1, 633a8bf44, -, 3),
-      HEX_DBL(+, 1, 0648bca9c96bd, -, 40) },
-    { HEX_DBL(+, 1, 1e2ef3b3fb874, +, 0), HEX_DBL(-, 1, 494f863b9, -, 3),
-      HEX_DBL(+, 1, 066fceb89b0eb, -, 42) },
-    { HEX_DBL(+, 1, 1bb4a4046ed29, +, 0), HEX_DBL(-, 1, 2f9e32d5c, -, 3),
-      HEX_DBL(+, 1, 17b8b6c4f846b, -, 46) },
-    { HEX_DBL(+, 1, 19453808ca29c, +, 0), HEX_DBL(-, 1, 162593187, -, 3),
-      HEX_DBL(+, 1, 2c83506452154, -, 42) },
-    { HEX_DBL(+, 1, 16e0689427378, +, 0), HEX_DBL(-, 1, f9c95dc1e, -, 4),
-      HEX_DBL(+, 1, dd5d2183150f3, -, 41) },
-    { HEX_DBL(+, 1, 1485f0e0acd3b, +, 0), HEX_DBL(-, 1, c7b528b72, -, 4),
-      HEX_DBL(+, 1, 0e43c4f4e619d, -, 40) },
-    { HEX_DBL(+, 1, 12358e75d3033, +, 0), HEX_DBL(-, 1, 960caf9ac, -, 4),
-      HEX_DBL(+, 1, 20fbfd5902a1e, -, 42) },
-    { HEX_DBL(+, 1, 0fef010fef01, +, 0), HEX_DBL(-, 1, 64ce26c08, -, 4),
-      HEX_DBL(+, 1, 8ebeefb4ac467, -, 40) },
-    { HEX_DBL(+, 1, 0db20a88f4695, +, 0), HEX_DBL(-, 1, 33f7cde16, -, 4),
-      HEX_DBL(+, 1, 30b3312da7a7d, -, 40) },
-    { HEX_DBL(+, 1, 0b7e6ec259dc7, +, 0), HEX_DBL(-, 1, 0387efbcc, -, 4),
-      HEX_DBL(+, 1, 796f1632949c3, -, 40) },
-    { HEX_DBL(+, 1, 0953f39010953, +, 0), HEX_DBL(-, 1, a6f9c378, -, 5),
-      HEX_DBL(+, 1, 1687e151172cc, -, 40) },
-    { HEX_DBL(+, 1, 073260a47f7c6, +, 0), HEX_DBL(-, 1, 47aa07358, -, 5),
-      HEX_DBL(+, 1, 1f87e4a9cc778, -, 42) },
-    { HEX_DBL(+, 1, 05197f7d73404, +, 0), HEX_DBL(-, 1, d23afc498, -, 6),
-      HEX_DBL(+, 1, b183a6b628487, -, 40) },
-    { HEX_DBL(+, 1, 03091b51f5e1a, +, 0), HEX_DBL(-, 1, 16a21e21, -, 6),
-      HEX_DBL(+, 1, 7d75c58973ce5, -, 40) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, f44659e4a4271, -, 1), HEX_DBL(+, 1, 11cd1d51, -, 5),
-      HEX_DBL(+, 1, 9a0d857e2f4b2, -, 40) },
-    { HEX_DBL(+, 1, ecc07b301ecc, -, 1), HEX_DBL(+, 1, c4dfab908, -, 5),
-      HEX_DBL(+, 1, 55b53fce557fd, -, 40) },
-    { HEX_DBL(+, 1, e573ac901e573, -, 1), HEX_DBL(+, 1, 3aa2fdd26, -, 4),
-      HEX_DBL(+, 1, f1cb0c9532089, -, 40) },
-    { HEX_DBL(+, 1, de5d6e3f8868a, -, 1), HEX_DBL(+, 1, 918a16e46, -, 4),
-      HEX_DBL(+, 1, 9af0dcd65a6e1, -, 43) },
-    { HEX_DBL(+, 1, d77b654b82c33, -, 1), HEX_DBL(+, 1, e72ec117e, -, 4),
-      HEX_DBL(+, 1, a5b93c4ebe124, -, 40) },
-    { HEX_DBL(+, 1, d0cb58f6ec074, -, 1), HEX_DBL(+, 1, 1dcd19755, -, 3),
-      HEX_DBL(+, 1, 5be50e71ddc6c, -, 42) },
-    { HEX_DBL(+, 1, ca4b3055ee191, -, 1), HEX_DBL(+, 1, 476a9f983, -, 3),
-      HEX_DBL(+, 1, ee9a798719e7f, -, 40) },
-    { HEX_DBL(+, 1, c3f8f01c3f8f, -, 1), HEX_DBL(+, 1, 70742d4ef, -, 3),
-      HEX_DBL(+, 1, 3ff1352c1219c, -, 46) },
-    { HEX_DBL(+, 1, bdd2b899406f7, -, 1), HEX_DBL(+, 1, 98edd077e, -, 3),
-      HEX_DBL(+, 1, c383cd11362f4, -, 41) },
-    { HEX_DBL(+, 1, b7d6c3dda338b, -, 1), HEX_DBL(+, 1, c0db6cdd9, -, 3),
-      HEX_DBL(+, 1, 37bd85b1a824e, -, 41) },
-    { HEX_DBL(+, 1, b2036406c80d9, -, 1), HEX_DBL(+, 1, e840be74e, -, 3),
-      HEX_DBL(+, 1, a9334d525e1ec, -, 41) },
-    { HEX_DBL(+, 1, ac5701ac5701a, -, 1), HEX_DBL(+, 1, 0790adbb, -, 2),
-      HEX_DBL(+, 1, 8060bfb6a491, -, 41) },
-    { HEX_DBL(+, 1, a6d01a6d01a6d, -, 1), HEX_DBL(+, 1, 1ac05b2918, -, 2),
-      HEX_DBL(+, 1, c1c161471580a, -, 40) },
-    { HEX_DBL(+, 1, a16d3f97a4b01, -, 1), HEX_DBL(+, 1, 2db10fc4d8, -, 2),
-      HEX_DBL(+, 1, ab1aa62214581, -, 42) },
-    { HEX_DBL(+, 1, 9c2d14ee4a101, -, 1), HEX_DBL(+, 1, 406463b1b, -, 2),
-      HEX_DBL(+, 1, 12e95dbda6611, -, 44) },
-    { HEX_DBL(+, 1, 970e4f80cb872, -, 1), HEX_DBL(+, 1, 52dbdfc4c8, -, 2),
-      HEX_DBL(+, 1, 6b53fee511af, -, 42) },
-    { HEX_DBL(+, 1, 920fb49d0e228, -, 1), HEX_DBL(+, 1, 6518fe467, -, 2),
-      HEX_DBL(+, 1, eea7d7d7d1764, -, 40) },
-    { HEX_DBL(+, 1, 8d3018d3018d3, -, 1), HEX_DBL(+, 1, 771d2ba7e8, -, 2),
-      HEX_DBL(+, 1, ecefa8d4fab97, -, 40) },
-    { HEX_DBL(+, 1, 886e5f0abb049, -, 1), HEX_DBL(+, 1, 88e9c72e08, -, 2),
-      HEX_DBL(+, 1, 913ea3d33fd14, -, 41) },
-    { HEX_DBL(+, 1, 83c977ab2bedd, -, 1), HEX_DBL(+, 1, 9a802391e, -, 2),
-      HEX_DBL(+, 1, 197e845877c94, -, 41) },
-    { HEX_DBL(+, 1, 7f405fd017f4, -, 1), HEX_DBL(+, 1, abe18797f, -, 2),
-      HEX_DBL(+, 1, f4a52f8e8a81, -, 42) },
-    { HEX_DBL(+, 1, 7ad2208e0ecc3, -, 1), HEX_DBL(+, 1, bd0f2e9e78, -, 2),
-      HEX_DBL(+, 1, 031f4336644cc, -, 42) },
-    { HEX_DBL(+, 1, 767dce434a9b1, -, 1), HEX_DBL(+, 1, ce0a4923a, -, 2),
-      HEX_DBL(+, 1, 61f33c897020c, -, 40) },
-    { HEX_DBL(+, 1, 724287f46debc, -, 1), HEX_DBL(+, 1, ded3fd442, -, 2),
-      HEX_DBL(+, 1, b2632e830632, -, 41) },
-    { HEX_DBL(+, 1, 6e1f76b4337c6, -, 1), HEX_DBL(+, 1, ef6d673288, -, 2),
-      HEX_DBL(+, 1, 888ec245a0bf, -, 40) },
-    { HEX_DBL(+, 1, 6a13cd153729, -, 1), HEX_DBL(+, 1, ffd799a838, -, 2),
-      HEX_DBL(+, 1, fe6f3b2f5fc8e, -, 40) },
-    { HEX_DBL(+, 1, 661ec6a5122f9, -, 1), HEX_DBL(+, 1, 0809cf27f4, -, 1),
-      HEX_DBL(+, 1, 81eaa9ef284dd, -, 40) },
-    { HEX_DBL(+, 1, 623fa7701623f, -, 1), HEX_DBL(+, 1, 10113b153c, -, 1),
-      HEX_DBL(+, 1, 1d7b07d6b1143, -, 42) },
-    { HEX_DBL(+, 1, 5e75bb8d015e7, -, 1), HEX_DBL(+, 1, 18028cf728, -, 1),
-      HEX_DBL(+, 1, 76b100b1f6c6, -, 41) },
-    { HEX_DBL(+, 1, 5ac056b015ac, -, 1), HEX_DBL(+, 1, 1fde3d30e8, -, 1),
-      HEX_DBL(+, 1, 26faeb9870945, -, 45) },
-    { HEX_DBL(+, 1, 571ed3c506b39, -, 1), HEX_DBL(+, 1, 27a4c0585c, -, 1),
-      HEX_DBL(+, 1, 7f2c5344d762b, -, 42) }
+{HEX_DBL( +, 1, 5390948f40fea, +, 0 ), HEX_DBL( -, 1, a152f142a,  -, 2 ), HEX_DBL( +, 1, f93e27b43bd2c, -, 40 )},
+{HEX_DBL( +, 1, 5015015015015, +, 0 ), HEX_DBL( -, 1, 921800925,  -, 2 ), HEX_DBL( +, 1, 162432a1b8df7, -, 41 )},
+{HEX_DBL( +, 1, 4cab88725af6e, +, 0 ), HEX_DBL( -, 1, 8304d90c18, -, 2 ), HEX_DBL( +, 1, 80bb749056fe7, -, 40 )},
+{HEX_DBL( +, 1, 49539e3b2d066, +, 0 ), HEX_DBL( -, 1, 7418acebc,  -, 2 ), HEX_DBL( +, 1, ceac7f0607711, -, 43 )},
+{HEX_DBL( +, 1, 460cbc7f5cf9a, +, 0 ), HEX_DBL( -, 1, 6552b49988, -, 2 ), HEX_DBL( +, 1, d8913d0e89fa,  -, 42 )},
+{HEX_DBL( +, 1, 42d6625d51f86, +, 0 ), HEX_DBL( -, 1, 56b22e6b58, -, 2 ), HEX_DBL( +, 1, c7eaf515033a1, -, 44 )},
+{HEX_DBL( +, 1, 3fb013fb013fb, +, 0 ), HEX_DBL( -, 1, 48365e696,  -, 2 ), HEX_DBL( +, 1, 434adcde7edc7, -, 41 )},
+{HEX_DBL( +, 1, 3c995a47babe7, +, 0 ), HEX_DBL( -, 1, 39de8e156,  -, 2 ), HEX_DBL( +, 1, 8246f8e527754, -, 40 )},
+{HEX_DBL( +, 1, 3991c2c187f63, +, 0 ), HEX_DBL( -, 1, 2baa0c34c,  -, 2 ), HEX_DBL( +, 1, e1513c28e180d, -, 42 )},
+{HEX_DBL( +, 1, 3698df3de0747, +, 0 ), HEX_DBL( -, 1, 1d982c9d58, -, 2 ), HEX_DBL( +, 1, 63ea3fed4b8a2, -, 40 )},
+{HEX_DBL( +, 1, 33ae45b57bcb1, +, 0 ), HEX_DBL( -, 1, 0fa848045,  -, 2 ), HEX_DBL( +, 1, 32ccbacf1779b, -, 40 )},
+{HEX_DBL( +, 1, 30d190130d19,  +, 0 ), HEX_DBL( -, 1, 01d9bbcfa8, -, 2 ), HEX_DBL( +, 1, e2bfeb2b884aa, -, 42 )},
+{HEX_DBL( +, 1, 2e025c04b8097, +, 0 ), HEX_DBL( -, 1, e857d3d37,  -, 3 ), HEX_DBL( +, 1, d9309b4d2ea85, -, 40 )},
+{HEX_DBL( +, 1, 2b404ad012b4,  +, 0 ), HEX_DBL( -, 1, cd3c712d4,  -, 3 ), HEX_DBL( +, 1, ddf360962d7ab, -, 40 )},
+{HEX_DBL( +, 1, 288b01288b012, +, 0 ), HEX_DBL( -, 1, b2602497e,  -, 3 ), HEX_DBL( +, 1, 597f8a121640f, -, 40 )},
+{HEX_DBL( +, 1, 25e22708092f1, +, 0 ), HEX_DBL( -, 1, 97c1cb13d,  -, 3 ), HEX_DBL( +, 1, 02807d15580dc, -, 40 )},
+{HEX_DBL( +, 1, 23456789abcdf, +, 0 ), HEX_DBL( -, 1, 7d60496d,   -, 3 ), HEX_DBL( +, 1, 12ce913d7a827, -, 41 )},
+{HEX_DBL( +, 1, 20b470c67c0d8, +, 0 ), HEX_DBL( -, 1, 633a8bf44,  -, 3 ), HEX_DBL( +, 1, 0648bca9c96bd, -, 40 )},
+{HEX_DBL( +, 1, 1e2ef3b3fb874, +, 0 ), HEX_DBL( -, 1, 494f863b9,  -, 3 ), HEX_DBL( +, 1, 066fceb89b0eb, -, 42 )},
+{HEX_DBL( +, 1, 1bb4a4046ed29, +, 0 ), HEX_DBL( -, 1, 2f9e32d5c,  -, 3 ), HEX_DBL( +, 1, 17b8b6c4f846b, -, 46 )},
+{HEX_DBL( +, 1, 19453808ca29c, +, 0 ), HEX_DBL( -, 1, 162593187,  -, 3 ), HEX_DBL( +, 1, 2c83506452154, -, 42 )},
+{HEX_DBL( +, 1, 16e0689427378, +, 0 ), HEX_DBL( -, 1, f9c95dc1e,  -, 4 ), HEX_DBL( +, 1, dd5d2183150f3, -, 41 )},
+{HEX_DBL( +, 1, 1485f0e0acd3b, +, 0 ), HEX_DBL( -, 1, c7b528b72,  -, 4 ), HEX_DBL( +, 1, 0e43c4f4e619d, -, 40 )},
+{HEX_DBL( +, 1, 12358e75d3033, +, 0 ), HEX_DBL( -, 1, 960caf9ac,  -, 4 ), HEX_DBL( +, 1, 20fbfd5902a1e, -, 42 )},
+{HEX_DBL( +, 1, 0fef010fef01,  +, 0 ), HEX_DBL( -, 1, 64ce26c08,  -, 4 ), HEX_DBL( +, 1, 8ebeefb4ac467, -, 40 )},
+{HEX_DBL( +, 1, 0db20a88f4695, +, 0 ), HEX_DBL( -, 1, 33f7cde16,  -, 4 ), HEX_DBL( +, 1, 30b3312da7a7d, -, 40 )},
+{HEX_DBL( +, 1, 0b7e6ec259dc7, +, 0 ), HEX_DBL( -, 1, 0387efbcc,  -, 4 ), HEX_DBL( +, 1, 796f1632949c3, -, 40 )},
+{HEX_DBL( +, 1, 0953f39010953, +, 0 ), HEX_DBL( -, 1, a6f9c378,   -, 5 ), HEX_DBL( +, 1, 1687e151172cc, -, 40 )},
+{HEX_DBL( +, 1, 073260a47f7c6, +, 0 ), HEX_DBL( -, 1, 47aa07358,  -, 5 ), HEX_DBL( +, 1, 1f87e4a9cc778, -, 42 )},
+{HEX_DBL( +, 1, 05197f7d73404, +, 0 ), HEX_DBL( -, 1, d23afc498,  -, 6 ), HEX_DBL( +, 1, b183a6b628487, -, 40 )},
+{HEX_DBL( +, 1, 03091b51f5e1a, +, 0 ), HEX_DBL( -, 1, 16a21e21,   -, 6 ), HEX_DBL( +, 1, 7d75c58973ce5, -, 40 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,          +, 0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,          +, 0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, f44659e4a4271, -, 1 ), HEX_DBL( +, 1, 11cd1d51,   -, 5 ), HEX_DBL( +, 1, 9a0d857e2f4b2, -, 40 )},
+{HEX_DBL( +, 1, ecc07b301ecc,  -, 1 ), HEX_DBL( +, 1, c4dfab908,  -, 5 ), HEX_DBL( +, 1, 55b53fce557fd, -, 40 )},
+{HEX_DBL( +, 1, e573ac901e573, -, 1 ), HEX_DBL( +, 1, 3aa2fdd26,  -, 4 ), HEX_DBL( +, 1, f1cb0c9532089, -, 40 )},
+{HEX_DBL( +, 1, de5d6e3f8868a, -, 1 ), HEX_DBL( +, 1, 918a16e46,  -, 4 ), HEX_DBL( +, 1, 9af0dcd65a6e1, -, 43 )},
+{HEX_DBL( +, 1, d77b654b82c33, -, 1 ), HEX_DBL( +, 1, e72ec117e,  -, 4 ), HEX_DBL( +, 1, a5b93c4ebe124, -, 40 )},
+{HEX_DBL( +, 1, d0cb58f6ec074, -, 1 ), HEX_DBL( +, 1, 1dcd19755,  -, 3 ), HEX_DBL( +, 1, 5be50e71ddc6c, -, 42 )},
+{HEX_DBL( +, 1, ca4b3055ee191, -, 1 ), HEX_DBL( +, 1, 476a9f983,  -, 3 ), HEX_DBL( +, 1, ee9a798719e7f, -, 40 )},
+{HEX_DBL( +, 1, c3f8f01c3f8f,  -, 1 ), HEX_DBL( +, 1, 70742d4ef,  -, 3 ), HEX_DBL( +, 1, 3ff1352c1219c, -, 46 )},
+{HEX_DBL( +, 1, bdd2b899406f7, -, 1 ), HEX_DBL( +, 1, 98edd077e,  -, 3 ), HEX_DBL( +, 1, c383cd11362f4, -, 41 )},
+{HEX_DBL( +, 1, b7d6c3dda338b, -, 1 ), HEX_DBL( +, 1, c0db6cdd9,  -, 3 ), HEX_DBL( +, 1, 37bd85b1a824e, -, 41 )},
+{HEX_DBL( +, 1, b2036406c80d9, -, 1 ), HEX_DBL( +, 1, e840be74e,  -, 3 ), HEX_DBL( +, 1, a9334d525e1ec, -, 41 )},
+{HEX_DBL( +, 1, ac5701ac5701a, -, 1 ), HEX_DBL( +, 1, 0790adbb,   -, 2 ), HEX_DBL( +, 1, 8060bfb6a491,  -, 41 )},
+{HEX_DBL( +, 1, a6d01a6d01a6d, -, 1 ), HEX_DBL( +, 1, 1ac05b2918, -, 2 ), HEX_DBL( +, 1, c1c161471580a, -, 40 )},
+{HEX_DBL( +, 1, a16d3f97a4b01, -, 1 ), HEX_DBL( +, 1, 2db10fc4d8, -, 2 ), HEX_DBL( +, 1, ab1aa62214581, -, 42 )},
+{HEX_DBL( +, 1, 9c2d14ee4a101, -, 1 ), HEX_DBL( +, 1, 406463b1b,  -, 2 ), HEX_DBL( +, 1, 12e95dbda6611, -, 44 )},
+{HEX_DBL( +, 1, 970e4f80cb872, -, 1 ), HEX_DBL( +, 1, 52dbdfc4c8, -, 2 ), HEX_DBL( +, 1, 6b53fee511af,  -, 42 )},
+{HEX_DBL( +, 1, 920fb49d0e228, -, 1 ), HEX_DBL( +, 1, 6518fe467,  -, 2 ), HEX_DBL( +, 1, eea7d7d7d1764, -, 40 )},
+{HEX_DBL( +, 1, 8d3018d3018d3, -, 1 ), HEX_DBL( +, 1, 771d2ba7e8, -, 2 ), HEX_DBL( +, 1, ecefa8d4fab97, -, 40 )},
+{HEX_DBL( +, 1, 886e5f0abb049, -, 1 ), HEX_DBL( +, 1, 88e9c72e08, -, 2 ), HEX_DBL( +, 1, 913ea3d33fd14, -, 41 )},
+{HEX_DBL( +, 1, 83c977ab2bedd, -, 1 ), HEX_DBL( +, 1, 9a802391e,  -, 2 ), HEX_DBL( +, 1, 197e845877c94, -, 41 )},
+{HEX_DBL( +, 1, 7f405fd017f4,  -, 1 ), HEX_DBL( +, 1, abe18797f,  -, 2 ), HEX_DBL( +, 1, f4a52f8e8a81,  -, 42 )},
+{HEX_DBL( +, 1, 7ad2208e0ecc3, -, 1 ), HEX_DBL( +, 1, bd0f2e9e78, -, 2 ), HEX_DBL( +, 1, 031f4336644cc, -, 42 )},
+{HEX_DBL( +, 1, 767dce434a9b1, -, 1 ), HEX_DBL( +, 1, ce0a4923a,  -, 2 ), HEX_DBL( +, 1, 61f33c897020c, -, 40 )},
+{HEX_DBL( +, 1, 724287f46debc, -, 1 ), HEX_DBL( +, 1, ded3fd442,  -, 2 ), HEX_DBL( +, 1, b2632e830632,  -, 41 )},
+{HEX_DBL( +, 1, 6e1f76b4337c6, -, 1 ), HEX_DBL( +, 1, ef6d673288, -, 2 ), HEX_DBL( +, 1, 888ec245a0bf,  -, 40 )},
+{HEX_DBL( +, 1, 6a13cd153729,  -, 1 ), HEX_DBL( +, 1, ffd799a838, -, 2 ), HEX_DBL( +, 1, fe6f3b2f5fc8e, -, 40 )},
+{HEX_DBL( +, 1, 661ec6a5122f9, -, 1 ), HEX_DBL( +, 1, 0809cf27f4, -, 1 ), HEX_DBL( +, 1, 81eaa9ef284dd, -, 40 )},
+{HEX_DBL( +, 1, 623fa7701623f, -, 1 ), HEX_DBL( +, 1, 10113b153c, -, 1 ), HEX_DBL( +, 1, 1d7b07d6b1143, -, 42 )},
+{HEX_DBL( +, 1, 5e75bb8d015e7, -, 1 ), HEX_DBL( +, 1, 18028cf728, -, 1 ), HEX_DBL( +, 1, 76b100b1f6c6,  -, 41 )},
+{HEX_DBL( +, 1, 5ac056b015ac,  -, 1 ), HEX_DBL( +, 1, 1fde3d30e8, -, 1 ), HEX_DBL( +, 1, 26faeb9870945, -, 45 )},
+{HEX_DBL( +, 1, 571ed3c506b39, -, 1 ), HEX_DBL( +, 1, 27a4c0585c, -, 1 ), HEX_DBL( +, 1, 7f2c5344d762b, -, 42 )}
 static double __loglTable2[64][3] = {
-    { HEX_DBL(+, 1, 01fbe7f0a1be6, +, 0), HEX_DBL(-, 1, 6cf6ddd26112a, -, 7),
-      HEX_DBL(+, 1, 0725e5755e314, -, 60) },
-    { HEX_DBL(+, 1, 01eba93a97b12, +, 0), HEX_DBL(-, 1, 6155b1d99f603, -, 7),
-      HEX_DBL(+, 1, 4bcea073117f4, -, 60) },
-    { HEX_DBL(+, 1, 01db6c9029cd1, +, 0), HEX_DBL(-, 1, 55b54153137ff, -, 7),
-      HEX_DBL(+, 1, 21e8faccad0ec, -, 61) },
-    { HEX_DBL(+, 1, 01cb31f0f534c, +, 0), HEX_DBL(-, 1, 4a158c27245bd, -, 7),
-      HEX_DBL(+, 1, 1a5b7bfbf35d3, -, 60) },
-    { HEX_DBL(+, 1, 01baf95c9723c, +, 0), HEX_DBL(-, 1, 3e76923e3d678, -, 7),
-      HEX_DBL(+, 1, eee400eb5fe34, -, 62) },
-    { HEX_DBL(+, 1, 01aac2d2acee6, +, 0), HEX_DBL(-, 1, 32d85380ce776, -, 7),
-      HEX_DBL(+, 1, cbf7a513937bd, -, 61) },
-    { HEX_DBL(+, 1, 019a8e52d401e, +, 0), HEX_DBL(-, 1, 273acfd74be72, -, 7),
-      HEX_DBL(+, 1, 5c64599efa5e6, -, 60) },
-    { HEX_DBL(+, 1, 018a5bdca9e42, +, 0), HEX_DBL(-, 1, 1b9e072a2e65, -, 7),
-      HEX_DBL(+, 1, 364180e0a5d37, -, 60) },
-    { HEX_DBL(+, 1, 017a2b6fcc33e, +, 0), HEX_DBL(-, 1, 1001f961f3243, -, 7),
-      HEX_DBL(+, 1, 63d795746f216, -, 60) },
-    { HEX_DBL(+, 1, 0169fd0bd8a8a, +, 0), HEX_DBL(-, 1, 0466a6671bca4, -, 7),
-      HEX_DBL(+, 1, 4c99ff1907435, -, 60) },
-    { HEX_DBL(+, 1, 0159d0b06d129, +, 0), HEX_DBL(-, 1, f1981c445cd05, -, 8),
-      HEX_DBL(+, 1, 4bfff6366b723, -, 62) },
-    { HEX_DBL(+, 1, 0149a65d275a6, +, 0), HEX_DBL(-, 1, da6460f76ab8c, -, 8),
-      HEX_DBL(+, 1, 9c5404f47589c, -, 61) },
-    { HEX_DBL(+, 1, 01397e11a581b, +, 0), HEX_DBL(-, 1, c3321ab87f4ef, -, 8),
-      HEX_DBL(+, 1, c0da537429cea, -, 61) },
-    { HEX_DBL(+, 1, 012957cd85a28, +, 0), HEX_DBL(-, 1, ac014958c112c, -, 8),
-      HEX_DBL(+, 1, 000c2a1b595e3, -, 64) },
-    { HEX_DBL(+, 1, 0119339065ef7, +, 0), HEX_DBL(-, 1, 94d1eca95f67a, -, 8),
-      HEX_DBL(+, 1, d8d20b0564d5, -, 61) },
-    { HEX_DBL(+, 1, 01091159e4b3d, +, 0), HEX_DBL(-, 1, 7da4047b92b3e, -, 8),
-      HEX_DBL(+, 1, 6194a5d68cf2, -, 66) },
-    { HEX_DBL(+, 1, 00f8f129a0535, +, 0), HEX_DBL(-, 1, 667790a09bf77, -, 8),
-      HEX_DBL(+, 1, ca230e0bea645, -, 61) },
-    { HEX_DBL(+, 1, 00e8d2ff374a1, +, 0), HEX_DBL(-, 1, 4f4c90e9c4ead, -, 8),
-      HEX_DBL(+, 1, 1de3e7f350c1, -, 61) },
-    { HEX_DBL(+, 1, 00d8b6da482ce, +, 0), HEX_DBL(-, 1, 3823052860649, -, 8),
-      HEX_DBL(+, 1, 5789b4c5891b8, -, 64) },
-    { HEX_DBL(+, 1, 00c89cba71a8c, +, 0), HEX_DBL(-, 1, 20faed2dc9a9e, -, 8),
-      HEX_DBL(+, 1, 9e7c40f9839fd, -, 62) },
-    { HEX_DBL(+, 1, 00b8849f52834, +, 0), HEX_DBL(-, 1, 09d448cb65014, -, 8),
-      HEX_DBL(+, 1, 387e3e9b6d02, -, 62) },
-    { HEX_DBL(+, 1, 00a86e88899a4, +, 0), HEX_DBL(-, 1, e55e2fa53ebf1, -, 9),
-      HEX_DBL(+, 1, cdaa71fddfddf, -, 62) },
-    { HEX_DBL(+, 1, 00985a75b5e3f, +, 0), HEX_DBL(-, 1, b716b429dce0f, -, 9),
-      HEX_DBL(+, 1, 2f2af081367bf, -, 63) },
-    { HEX_DBL(+, 1, 00884866766ee, +, 0), HEX_DBL(-, 1, 88d21ec7a16d7, -, 9),
-      HEX_DBL(+, 1, fb95c228d6f16, -, 62) },
-    { HEX_DBL(+, 1, 0078385a6a61d, +, 0), HEX_DBL(-, 1, 5a906f219a9e8, -, 9),
-      HEX_DBL(+, 1, 18aff10a89f29, -, 64) },
-    { HEX_DBL(+, 1, 00682a5130fbe, +, 0), HEX_DBL(-, 1, 2c51a4dae87f1, -, 9),
-      HEX_DBL(+, 1, bcc7e33ddde3, -, 63) },
-    { HEX_DBL(+, 1, 00581e4a69944, +, 0), HEX_DBL(-, 1, fc2b7f2d782b1, -, 10),
-      HEX_DBL(+, 1, fe3ef3300a9fa, -, 64) },
-    { HEX_DBL(+, 1, 00481445b39a8, +, 0), HEX_DBL(-, 1, 9fb97df0b0b83, -, 10),
-      HEX_DBL(+, 1, 0d9a601f2f324, -, 65) },
-    { HEX_DBL(+, 1, 00380c42ae963, +, 0), HEX_DBL(-, 1, 434d4546227ae, -, 10),
-      HEX_DBL(+, 1, 0b9b6a5868f33, -, 63) },
-    { HEX_DBL(+, 1, 00280640fa271, +, 0), HEX_DBL(-, 1, cdcda8e930c19, -, 11),
-      HEX_DBL(+, 1, 3d424ab39f789, -, 64) },
-    { HEX_DBL(+, 1, 0018024036051, +, 0), HEX_DBL(-, 1, 150c558601261, -, 11),
-      HEX_DBL(+, 1, 285bb90327a0f, -, 64) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, ffa011fca0a1e, -, 1), HEX_DBL(+, 1, 14e5640c4197b, -, 10),
-      HEX_DBL(+, 1, 95728136ae401, -, 63) },
-    { HEX_DBL(+, 1, ff6031f064e07, -, 1), HEX_DBL(+, 1, cd61806bf532d, -, 10),
-      HEX_DBL(+, 1, 568a4f35d8538, -, 63) },
-    { HEX_DBL(+, 1, ff2061d532b9c, -, 1), HEX_DBL(+, 1, 42e34af550eda, -, 9),
-      HEX_DBL(+, 1, 8f69cee55fec, -, 62) },
-    { HEX_DBL(+, 1, fee0a1a513253, -, 1), HEX_DBL(+, 1, 9f0a5523902ea, -, 9),
-      HEX_DBL(+, 1, daec734b11615, -, 63) },
-    { HEX_DBL(+, 1, fea0f15a12139, -, 1), HEX_DBL(+, 1, fb25e19f11b26, -, 9),
-      HEX_DBL(+, 1, 8bafca62941da, -, 62) },
-    { HEX_DBL(+, 1, fe6150ee3e6d4, -, 1), HEX_DBL(+, 1, 2b9af9a28e282, -, 8),
-      HEX_DBL(+, 1, 0fd3674e1dc5b, -, 61) },
-    { HEX_DBL(+, 1, fe21c05baa109, -, 1), HEX_DBL(+, 1, 599d4678f24b9, -, 8),
-      HEX_DBL(+, 1, dafce1f09937b, -, 61) },
-    { HEX_DBL(+, 1, fde23f9c69cf9, -, 1), HEX_DBL(+, 1, 8799d8c046eb, -, 8),
-      HEX_DBL(+, 1, ffa0ce0bdd217, -, 65) },
-    { HEX_DBL(+, 1, fda2ceaa956e8, -, 1), HEX_DBL(+, 1, b590b1e5951ee, -, 8),
-      HEX_DBL(+, 1, 645a769232446, -, 62) },
-    { HEX_DBL(+, 1, fd636d8047a1f, -, 1), HEX_DBL(+, 1, e381d3555dbcf, -, 8),
-      HEX_DBL(+, 1, 882320d368331, -, 61) },
-    { HEX_DBL(+, 1, fd241c179e0cc, -, 1), HEX_DBL(+, 1, 08b69f3dccde, -, 7),
-      HEX_DBL(+, 1, 01ad5065aba9e, -, 61) },
-    { HEX_DBL(+, 1, fce4da6ab93e8, -, 1), HEX_DBL(+, 1, 1fa97a61dd298, -, 7),
-      HEX_DBL(+, 1, 84cd1f931ae34, -, 60) },
-    { HEX_DBL(+, 1, fca5a873bcb19, -, 1), HEX_DBL(+, 1, 36997bcc54a3f, -, 7),
-      HEX_DBL(+, 1, 1485e97eaee03, -, 60) },
-    { HEX_DBL(+, 1, fc66862ccec93, -, 1), HEX_DBL(+, 1, 4d86a43264a4f, -, 7),
-      HEX_DBL(+, 1, c75e63370988b, -, 61) },
-    { HEX_DBL(+, 1, fc27739018cfe, -, 1), HEX_DBL(+, 1, 6470f448fb09d, -, 7),
-      HEX_DBL(+, 1, d7361eeaed0a1, -, 65) },
-    { HEX_DBL(+, 1, fbe87097c6f5a, -, 1), HEX_DBL(+, 1, 7b586cc4c2523, -, 7),
-      HEX_DBL(+, 1, b3df952cc473c, -, 61) },
-    { HEX_DBL(+, 1, fba97d3e084dd, -, 1), HEX_DBL(+, 1, 923d0e5a21e06, -, 7),
-      HEX_DBL(+, 1, cf56c7b64ae5d, -, 62) },
-    { HEX_DBL(+, 1, fb6a997d0ecdc, -, 1), HEX_DBL(+, 1, a91ed9bd3df9a, -, 7),
-      HEX_DBL(+, 1, b957bdcd89e43, -, 61) },
-    { HEX_DBL(+, 1, fb2bc54f0f4ab, -, 1), HEX_DBL(+, 1, bffdcfa1f7fbb, -, 7),
-      HEX_DBL(+, 1, ea8cad9a21771, -, 62) },
-    { HEX_DBL(+, 1, faed00ae41783, -, 1), HEX_DBL(+, 1, d6d9f0bbee6f6, -, 7),
-      HEX_DBL(+, 1, 5762a9af89c82, -, 60) },
-    { HEX_DBL(+, 1, faae4b94dfe64, -, 1), HEX_DBL(+, 1, edb33dbe7d335, -, 7),
-      HEX_DBL(+, 1, 21e24fc245697, -, 62) },
-    { HEX_DBL(+, 1, fa6fa5fd27ff8, -, 1), HEX_DBL(+, 1, 0244dbae5ed05, -, 6),
-      HEX_DBL(+, 1, 12ef51b967102, -, 60) },
-    { HEX_DBL(+, 1, fa310fe15a078, -, 1), HEX_DBL(+, 1, 0daeaf24c3529, -, 6),
-      HEX_DBL(+, 1, 10d3cfca60b45, -, 59) },
-    { HEX_DBL(+, 1, f9f2893bb9192, -, 1), HEX_DBL(+, 1, 1917199bb66bc, -, 6),
-      HEX_DBL(+, 1, 6cf6034c32e19, -, 60) },
-    { HEX_DBL(+, 1, f9b412068b247, -, 1), HEX_DBL(+, 1, 247e1b6c615d5, -, 6),
-      HEX_DBL(+, 1, 42f0fffa229f7, -, 61) },
-    { HEX_DBL(+, 1, f975aa3c18ed6, -, 1), HEX_DBL(+, 1, 2fe3b4efcc5ad, -, 6),
-      HEX_DBL(+, 1, 70106136a8919, -, 60) },
-    { HEX_DBL(+, 1, f93751d6ae09b, -, 1), HEX_DBL(+, 1, 3b47e67edea93, -, 6),
-      HEX_DBL(+, 1, 38dd5a4f6959a, -, 59) },
-    { HEX_DBL(+, 1, f8f908d098df6, -, 1), HEX_DBL(+, 1, 46aab0725ea6c, -, 6),
-      HEX_DBL(+, 1, 821fc1e799e01, -, 60) },
-    { HEX_DBL(+, 1, f8bacf242aa2c, -, 1), HEX_DBL(+, 1, 520c1322f1e4e, -, 6),
-      HEX_DBL(+, 1, 129dcda3ad563, -, 60) },
-    { HEX_DBL(+, 1, f87ca4cbb755, -, 1), HEX_DBL(+, 1, 5d6c0ee91d2ab, -, 6),
-      HEX_DBL(+, 1, c5b190c04606e, -, 62) },
-    { HEX_DBL(+, 1, f83e89c195c25, -, 1), HEX_DBL(+, 1, 68caa41d448c3, -, 6),
-      HEX_DBL(+, 1, 4723441195ac9, -, 59) }
+{HEX_DBL( +, 1, 01fbe7f0a1be6, +, 0 ), HEX_DBL( -, 1, 6cf6ddd26112a, -,  7 ), HEX_DBL( +, 1, 0725e5755e314, -, 60 )},
+{HEX_DBL( +, 1, 01eba93a97b12, +, 0 ), HEX_DBL( -, 1, 6155b1d99f603, -,  7 ), HEX_DBL( +, 1, 4bcea073117f4, -, 60 )},
+{HEX_DBL( +, 1, 01db6c9029cd1, +, 0 ), HEX_DBL( -, 1, 55b54153137ff, -,  7 ), HEX_DBL( +, 1, 21e8faccad0ec, -, 61 )},
+{HEX_DBL( +, 1, 01cb31f0f534c, +, 0 ), HEX_DBL( -, 1, 4a158c27245bd, -,  7 ), HEX_DBL( +, 1, 1a5b7bfbf35d3, -, 60 )},
+{HEX_DBL( +, 1, 01baf95c9723c, +, 0 ), HEX_DBL( -, 1, 3e76923e3d678, -,  7 ), HEX_DBL( +, 1, eee400eb5fe34, -, 62 )},
+{HEX_DBL( +, 1, 01aac2d2acee6, +, 0 ), HEX_DBL( -, 1, 32d85380ce776, -,  7 ), HEX_DBL( +, 1, cbf7a513937bd, -, 61 )},
+{HEX_DBL( +, 1, 019a8e52d401e, +, 0 ), HEX_DBL( -, 1, 273acfd74be72, -,  7 ), HEX_DBL( +, 1, 5c64599efa5e6, -, 60 )},
+{HEX_DBL( +, 1, 018a5bdca9e42, +, 0 ), HEX_DBL( -, 1, 1b9e072a2e65,  -,  7 ), HEX_DBL( +, 1, 364180e0a5d37, -, 60 )},
+{HEX_DBL( +, 1, 017a2b6fcc33e, +, 0 ), HEX_DBL( -, 1, 1001f961f3243, -,  7 ), HEX_DBL( +, 1, 63d795746f216, -, 60 )},
+{HEX_DBL( +, 1, 0169fd0bd8a8a, +, 0 ), HEX_DBL( -, 1, 0466a6671bca4, -,  7 ), HEX_DBL( +, 1, 4c99ff1907435, -, 60 )},
+{HEX_DBL( +, 1, 0159d0b06d129, +, 0 ), HEX_DBL( -, 1, f1981c445cd05, -,  8 ), HEX_DBL( +, 1, 4bfff6366b723, -, 62 )},
+{HEX_DBL( +, 1, 0149a65d275a6, +, 0 ), HEX_DBL( -, 1, da6460f76ab8c, -,  8 ), HEX_DBL( +, 1, 9c5404f47589c, -, 61 )},
+{HEX_DBL( +, 1, 01397e11a581b, +, 0 ), HEX_DBL( -, 1, c3321ab87f4ef, -,  8 ), HEX_DBL( +, 1, c0da537429cea, -, 61 )},
+{HEX_DBL( +, 1, 012957cd85a28, +, 0 ), HEX_DBL( -, 1, ac014958c112c, -,  8 ), HEX_DBL( +, 1, 000c2a1b595e3, -, 64 )},
+{HEX_DBL( +, 1, 0119339065ef7, +, 0 ), HEX_DBL( -, 1, 94d1eca95f67a, -,  8 ), HEX_DBL( +, 1, d8d20b0564d5,  -, 61 )},
+{HEX_DBL( +, 1, 01091159e4b3d, +, 0 ), HEX_DBL( -, 1, 7da4047b92b3e, -,  8 ), HEX_DBL( +, 1, 6194a5d68cf2,  -, 66 )},
+{HEX_DBL( +, 1, 00f8f129a0535, +, 0 ), HEX_DBL( -, 1, 667790a09bf77, -,  8 ), HEX_DBL( +, 1, ca230e0bea645, -, 61 )},
+{HEX_DBL( +, 1, 00e8d2ff374a1, +, 0 ), HEX_DBL( -, 1, 4f4c90e9c4ead, -,  8 ), HEX_DBL( +, 1, 1de3e7f350c1,  -, 61 )},
+{HEX_DBL( +, 1, 00d8b6da482ce, +, 0 ), HEX_DBL( -, 1, 3823052860649, -,  8 ), HEX_DBL( +, 1, 5789b4c5891b8, -, 64 )},
+{HEX_DBL( +, 1, 00c89cba71a8c, +, 0 ), HEX_DBL( -, 1, 20faed2dc9a9e, -,  8 ), HEX_DBL( +, 1, 9e7c40f9839fd, -, 62 )},
+{HEX_DBL( +, 1, 00b8849f52834, +, 0 ), HEX_DBL( -, 1, 09d448cb65014, -,  8 ), HEX_DBL( +, 1, 387e3e9b6d02,  -, 62 )},
+{HEX_DBL( +, 1, 00a86e88899a4, +, 0 ), HEX_DBL( -, 1, e55e2fa53ebf1, -,  9 ), HEX_DBL( +, 1, cdaa71fddfddf, -, 62 )},
+{HEX_DBL( +, 1, 00985a75b5e3f, +, 0 ), HEX_DBL( -, 1, b716b429dce0f, -,  9 ), HEX_DBL( +, 1, 2f2af081367bf, -, 63 )},
+{HEX_DBL( +, 1, 00884866766ee, +, 0 ), HEX_DBL( -, 1, 88d21ec7a16d7, -,  9 ), HEX_DBL( +, 1, fb95c228d6f16, -, 62 )},
+{HEX_DBL( +, 1, 0078385a6a61d, +, 0 ), HEX_DBL( -, 1, 5a906f219a9e8, -,  9 ), HEX_DBL( +, 1, 18aff10a89f29, -, 64 )},
+{HEX_DBL( +, 1, 00682a5130fbe, +, 0 ), HEX_DBL( -, 1, 2c51a4dae87f1, -,  9 ), HEX_DBL( +, 1, bcc7e33ddde3,  -, 63 )},
+{HEX_DBL( +, 1, 00581e4a69944, +, 0 ), HEX_DBL( -, 1, fc2b7f2d782b1, -, 10 ), HEX_DBL( +, 1, fe3ef3300a9fa, -, 64 )},
+{HEX_DBL( +, 1, 00481445b39a8, +, 0 ), HEX_DBL( -, 1, 9fb97df0b0b83, -, 10 ), HEX_DBL( +, 1, 0d9a601f2f324, -, 65 )},
+{HEX_DBL( +, 1, 00380c42ae963, +, 0 ), HEX_DBL( -, 1, 434d4546227ae, -, 10 ), HEX_DBL( +, 1, 0b9b6a5868f33, -, 63 )},
+{HEX_DBL( +, 1, 00280640fa271, +, 0 ), HEX_DBL( -, 1, cdcda8e930c19, -, 11 ), HEX_DBL( +, 1, 3d424ab39f789, -, 64 )},
+{HEX_DBL( +, 1, 0018024036051, +, 0 ), HEX_DBL( -, 1, 150c558601261, -, 11 ), HEX_DBL( +, 1, 285bb90327a0f, -, 64 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, ffa011fca0a1e, -, 1 ), HEX_DBL( +, 1, 14e5640c4197b, -, 10 ), HEX_DBL( +, 1, 95728136ae401, -, 63 )},
+{HEX_DBL( +, 1, ff6031f064e07, -, 1 ), HEX_DBL( +, 1, cd61806bf532d, -, 10 ), HEX_DBL( +, 1, 568a4f35d8538, -, 63 )},
+{HEX_DBL( +, 1, ff2061d532b9c, -, 1 ), HEX_DBL( +, 1, 42e34af550eda, -,  9 ), HEX_DBL( +, 1, 8f69cee55fec,  -, 62 )},
+{HEX_DBL( +, 1, fee0a1a513253, -, 1 ), HEX_DBL( +, 1, 9f0a5523902ea, -,  9 ), HEX_DBL( +, 1, daec734b11615, -, 63 )},
+{HEX_DBL( +, 1, fea0f15a12139, -, 1 ), HEX_DBL( +, 1, fb25e19f11b26, -,  9 ), HEX_DBL( +, 1, 8bafca62941da, -, 62 )},
+{HEX_DBL( +, 1, fe6150ee3e6d4, -, 1 ), HEX_DBL( +, 1, 2b9af9a28e282, -,  8 ), HEX_DBL( +, 1, 0fd3674e1dc5b, -, 61 )},
+{HEX_DBL( +, 1, fe21c05baa109, -, 1 ), HEX_DBL( +, 1, 599d4678f24b9, -,  8 ), HEX_DBL( +, 1, dafce1f09937b, -, 61 )},
+{HEX_DBL( +, 1, fde23f9c69cf9, -, 1 ), HEX_DBL( +, 1, 8799d8c046eb,  -,  8 ), HEX_DBL( +, 1, ffa0ce0bdd217, -, 65 )},
+{HEX_DBL( +, 1, fda2ceaa956e8, -, 1 ), HEX_DBL( +, 1, b590b1e5951ee, -,  8 ), HEX_DBL( +, 1, 645a769232446, -, 62 )},
+{HEX_DBL( +, 1, fd636d8047a1f, -, 1 ), HEX_DBL( +, 1, e381d3555dbcf, -,  8 ), HEX_DBL( +, 1, 882320d368331, -, 61 )},
+{HEX_DBL( +, 1, fd241c179e0cc, -, 1 ), HEX_DBL( +, 1, 08b69f3dccde,  -,  7 ), HEX_DBL( +, 1, 01ad5065aba9e, -, 61 )},
+{HEX_DBL( +, 1, fce4da6ab93e8, -, 1 ), HEX_DBL( +, 1, 1fa97a61dd298, -,  7 ), HEX_DBL( +, 1, 84cd1f931ae34, -, 60 )},
+{HEX_DBL( +, 1, fca5a873bcb19, -, 1 ), HEX_DBL( +, 1, 36997bcc54a3f, -,  7 ), HEX_DBL( +, 1, 1485e97eaee03, -, 60 )},
+{HEX_DBL( +, 1, fc66862ccec93, -, 1 ), HEX_DBL( +, 1, 4d86a43264a4f, -,  7 ), HEX_DBL( +, 1, c75e63370988b, -, 61 )},
+{HEX_DBL( +, 1, fc27739018cfe, -, 1 ), HEX_DBL( +, 1, 6470f448fb09d, -,  7 ), HEX_DBL( +, 1, d7361eeaed0a1, -, 65 )},
+{HEX_DBL( +, 1, fbe87097c6f5a, -, 1 ), HEX_DBL( +, 1, 7b586cc4c2523, -,  7 ), HEX_DBL( +, 1, b3df952cc473c, -, 61 )},
+{HEX_DBL( +, 1, fba97d3e084dd, -, 1 ), HEX_DBL( +, 1, 923d0e5a21e06, -,  7 ), HEX_DBL( +, 1, cf56c7b64ae5d, -, 62 )},
+{HEX_DBL( +, 1, fb6a997d0ecdc, -, 1 ), HEX_DBL( +, 1, a91ed9bd3df9a, -,  7 ), HEX_DBL( +, 1, b957bdcd89e43, -, 61 )},
+{HEX_DBL( +, 1, fb2bc54f0f4ab, -, 1 ), HEX_DBL( +, 1, bffdcfa1f7fbb, -,  7 ), HEX_DBL( +, 1, ea8cad9a21771, -, 62 )},
+{HEX_DBL( +, 1, faed00ae41783, -, 1 ), HEX_DBL( +, 1, d6d9f0bbee6f6, -,  7 ), HEX_DBL( +, 1, 5762a9af89c82, -, 60 )},
+{HEX_DBL( +, 1, faae4b94dfe64, -, 1 ), HEX_DBL( +, 1, edb33dbe7d335, -,  7 ), HEX_DBL( +, 1, 21e24fc245697, -, 62 )},
+{HEX_DBL( +, 1, fa6fa5fd27ff8, -, 1 ), HEX_DBL( +, 1, 0244dbae5ed05, -,  6 ), HEX_DBL( +, 1, 12ef51b967102, -, 60 )},
+{HEX_DBL( +, 1, fa310fe15a078, -, 1 ), HEX_DBL( +, 1, 0daeaf24c3529, -,  6 ), HEX_DBL( +, 1, 10d3cfca60b45, -, 59 )},
+{HEX_DBL( +, 1, f9f2893bb9192, -, 1 ), HEX_DBL( +, 1, 1917199bb66bc, -,  6 ), HEX_DBL( +, 1, 6cf6034c32e19, -, 60 )},
+{HEX_DBL( +, 1, f9b412068b247, -, 1 ), HEX_DBL( +, 1, 247e1b6c615d5, -,  6 ), HEX_DBL( +, 1, 42f0fffa229f7, -, 61 )},
+{HEX_DBL( +, 1, f975aa3c18ed6, -, 1 ), HEX_DBL( +, 1, 2fe3b4efcc5ad, -,  6 ), HEX_DBL( +, 1, 70106136a8919, -, 60 )},
+{HEX_DBL( +, 1, f93751d6ae09b, -, 1 ), HEX_DBL( +, 1, 3b47e67edea93, -,  6 ), HEX_DBL( +, 1, 38dd5a4f6959a, -, 59 )},
+{HEX_DBL( +, 1, f8f908d098df6, -, 1 ), HEX_DBL( +, 1, 46aab0725ea6c, -,  6 ), HEX_DBL( +, 1, 821fc1e799e01, -, 60 )},
+{HEX_DBL( +, 1, f8bacf242aa2c, -, 1 ), HEX_DBL( +, 1, 520c1322f1e4e, -,  6 ), HEX_DBL( +, 1, 129dcda3ad563, -, 60 )},
+{HEX_DBL( +, 1, f87ca4cbb755,  -, 1 ), HEX_DBL( +, 1, 5d6c0ee91d2ab, -,  6 ), HEX_DBL( +, 1, c5b190c04606e, -, 62 )},
+{HEX_DBL( +, 1, f83e89c195c25, -, 1 ), HEX_DBL( +, 1, 68caa41d448c3, -,  6 ), HEX_DBL( +, 1, 4723441195ac9, -, 59 )}
 static double __loglTable3[8][3] = {
-    { HEX_DBL(+, 1, 000e00c40ab89, +, 0), HEX_DBL(-, 1, 4332be0032168, -, 12),
-      HEX_DBL(+, 1, a1003588d217a, -, 65) },
-    { HEX_DBL(+, 1, 000a006403e82, +, 0), HEX_DBL(-, 1, cdb2987366fcc, -, 13),
-      HEX_DBL(+, 1, 5c86001294bbc, -, 67) },
-    { HEX_DBL(+, 1, 0006002400d8, +, 0), HEX_DBL(-, 1, 150297c90fa6f, -, 13),
-      HEX_DBL(+, 1, 01fb4865fae32, -, 66) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, 0, +, 0), HEX_DBL(+, 0, 0, +, 0), HEX_DBL(+, 0, 0, +, 0) },
-    { HEX_DBL(+, 1, ffe8011ff280a, -, 1), HEX_DBL(+, 1, 14f8daf5e3d3b, -, 12),
-      HEX_DBL(+, 1, 3c933b4b6b914, -, 68) },
-    { HEX_DBL(+, 1, ffd8031fc184e, -, 1), HEX_DBL(+, 1, cd978c38042bb, -, 12),
-      HEX_DBL(+, 1, 10f8e642e66fd, -, 65) },
-    { HEX_DBL(+, 1, ffc8061f5492b, -, 1), HEX_DBL(+, 1, 43183c878274e, -, 11),
-      HEX_DBL(+, 1, 5885dd1eb6582, -, 65) }
+{HEX_DBL( +, 1, 000e00c40ab89, +, 0 ), HEX_DBL( -, 1, 4332be0032168, -, 12 ), HEX_DBL( +, 1, a1003588d217a, -, 65 )},
+{HEX_DBL( +, 1, 000a006403e82, +, 0 ), HEX_DBL( -, 1, cdb2987366fcc, -, 13 ), HEX_DBL( +, 1, 5c86001294bbc, -, 67 )},
+{HEX_DBL( +, 1, 0006002400d8,  +, 0 ), HEX_DBL( -, 1, 150297c90fa6f, -, 13 ), HEX_DBL( +, 1, 01fb4865fae32, -, 66 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, ffe8011ff280a, -, 1 ), HEX_DBL( +, 1, 14f8daf5e3d3b, -, 12 ), HEX_DBL( +, 1, 3c933b4b6b914, -, 68 )},
+{HEX_DBL( +, 1, ffd8031fc184e, -, 1 ), HEX_DBL( +, 1, cd978c38042bb, -, 12 ), HEX_DBL( +, 1, 10f8e642e66fd, -, 65 )},
+{HEX_DBL( +, 1, ffc8061f5492b, -, 1 ), HEX_DBL( +, 1, 43183c878274e, -, 11 ), HEX_DBL( +, 1, 5885dd1eb6582, -, 65 )}
 static void __log2_ep(double *hi, double *lo, double x)
-    union {
-        uint64_t i;
-        double d;
-    } uu;
+    union { uint64_t i; double d; } uu;
     int m;
     double f = reference_frexp(x, &m);
     // bring f in [0.75, 1.5)
-    if (f < 0.75)
-    {
+    if( f < 0.75 ) {
         f *= 2.0;
         m -= 1;
     // index first table .... brings down to [1-2^-7, 1+2^6)
     uu.d = f;
-    int index =
-        (int)(((uu.i + ((uint64_t)1 << 51)) & 0x000fc00000000000ULL) >> 46);
+    int index = (int) (((uu.i + ((uint64_t) 1 << 51)) & 0x000fc00000000000ULL) >> 46);
     double r1 = __loglTable1[index][0];
     double logr1hi = __loglTable1[index][1];
     double logr1lo = __loglTable1[index][2];
-    // since log1rhi has 39 bits of precision, we have 14 bit in hand ... since
-    // |m| <= 1023 which needs 10bits at max, we can directly add m to log1hi
-    // without spilling
+    // since log1rhi has 39 bits of precision, we have 14 bit in hand ... since |m| <= 1023
+    // which needs 10bits at max, we can directly add m to log1hi without spilling
     logr1hi += m;
-    // argument reduction needs to be in double-double since reduced argument
-    // will form the leading term of polynomial approximation which sets the
-    // precision we eventually achieve
+    // argument reduction needs to be in double-double since reduced argument will form the
+    // leading term of polynomial approximation which sets the precision we eventually achieve
     double zhi, zlo;
     MulD(&zhi, &zlo, r1, uu.d);
     // second index table .... brings down to [1-2^-12, 1+2^-11)
     uu.d = zhi;
-    index = (int)(((uu.i + ((uint64_t)1 << 46)) & 0x00007e0000000000ULL) >> 41);
+    index = (int) (((uu.i + ((uint64_t) 1 << 46)) & 0x00007e0000000000ULL) >> 41);
     double r2 = __loglTable2[index][0];
     double logr2hi = __loglTable2[index][1];
     double logr2lo = __loglTable2[index][2];
@@ -4466,12 +4170,11 @@
     // Actually reduction to 2^-11 would have been sufficient to calculate
     // second order term in polynomial in double rather than double-double, I
     // reduced it a bit more to make sure other systematic arithmetic errors
-    // are guarded against .... also this allow lower order product of leading
-    // polynomial term i.e. Ao_hi*z_lo + Ao_lo*z_hi to be done in double rather
-    // than double-double ... hence only term that needs to be done in
-    // double-double is Ao_hi*z_hi
+    // are guarded against .... also this allow lower order product of leading polynomial
+    // term i.e. Ao_hi*z_lo + Ao_lo*z_hi to be done in double rather than double-double ...
+    // hence only term that needs to be done in double-double is Ao_hi*z_hi
     uu.d = zhi;
-    index = (int)(((uu.i + ((uint64_t)1 << 41)) & 0x0000038000000000ULL) >> 39);
+    index = (int) (((uu.i + ((uint64_t) 1 << 41)) & 0x0000038000000000ULL) >> 39);
     double r3 = __loglTable3[index][0];
     double logr3hi = __loglTable3[index][1];
     double logr3lo = __loglTable3[index][2];
@@ -4483,36 +4186,34 @@
     AddDD(&log2hi, &log2lo, logr1hi, logr1lo, logr2hi, logr2lo);
     AddDD(&log2hi, &log2lo, logr3hi, logr3lo, log2hi, log2lo);
-    // final argument reduction .... zhi will be in [1-2^-14, 1+2^-13) after
-    // this
+    // final argument reduction .... zhi will be in [1-2^-14, 1+2^-13) after this
     MulDD(&zhi, &zlo, zhi, zlo, r3, 0.0);
-    // we dont need to do full double-double substract here. substracting 1.0
-    // for higher term is exact
+    // we dont need to do full double-double substract here. substracting 1.0 for higher
+    // term is exact
     zhi = zhi - 1.0;
     // normalize
     AddD(&zhi, &zlo, zhi, zlo);
     // polynomail fitting to compute log2(1 + z) ... forth order polynomial fit
-    // to log2(1 + z)/z gives minimax absolute error of O(2^-76) with z in
-    // [-2^-14, 2^-13] log2(1 + z)/z = Ao + A1*z + A2*z^2 + A3*z^3 + A4*z^4
+    // to log2(1 + z)/z gives minimax absolute error of O(2^-76) with z in [-2^-14, 2^-13]
+    // log2(1 + z)/z = Ao + A1*z + A2*z^2 + A3*z^3 + A4*z^4
     // => log2(1 + z) = Ao*z + A1*z^2 + A2*z^3 + A3*z^4 + A4*z^5
-    // => log2(1 + z) = (Aohi + Aolo)*(zhi + zlo) + z^2*(A1 + A2*z + A3*z^2 +
-    // A4*z^3) since we are looking for at least 64 digits of precision and z in
-    // [-2^-14, 2^-13], final term can be done in double .... also Aolo*zhi +
-    // Aohi*zlo can be done in double .... Aohi*zhi needs to be done in
-    // double-double
+    // => log2(1 + z) = (Aohi + Aolo)*(zhi + zlo) + z^2*(A1 + A2*z + A3*z^2 + A4*z^3)
+    // since we are looking for at least 64 digits of precision and z in [-2^-14, 2^-13], final term
+    // can be done in double .... also Aolo*zhi + Aohi*zlo can be done in double ....
+    // Aohi*zhi needs to be done in double-double
-    double Aohi = HEX_DBL(+, 1, 71547652b82fe, +, 0);
-    double Aolo = HEX_DBL(+, 1, 777c9cbb675c, -, 56);
+    double Aohi = HEX_DBL( +, 1, 71547652b82fe, +, 0 );
+    double Aolo = HEX_DBL( +, 1, 777c9cbb675c, -, 56 );
     double y;
-    y = HEX_DBL(+, 1, 276d2736fade7, -, 2);
-    y = HEX_DBL(-, 1, 7154765782df1, -, 2) + y * zhi;
-    y = HEX_DBL(+, 1, ec709dc3a0f67, -, 2) + y * zhi;
-    y = HEX_DBL(-, 1, 71547652b82fe, -, 1) + y * zhi;
-    double zhisq = zhi * zhi;
-    y = y * zhisq;
-    y = y + zhi * Aolo;
-    y = y + zlo * Aohi;
+    y = HEX_DBL( +, 1, 276d2736fade7, -, 2 );
+    y = HEX_DBL( -, 1, 7154765782df1, -, 2 ) + y*zhi;
+    y = HEX_DBL( +, 1, ec709dc3a0f67, -, 2 ) + y*zhi;
+    y = HEX_DBL( -, 1, 71547652b82fe, -, 1 ) + y*zhi;
+    double zhisq = zhi*zhi;
+    y = y*zhisq;
+    y = y + zhi*Aolo;
+    y = y + zlo*Aohi;
     MulD(&zhi, &zlo, Aohi, zhi);
     AddDD(&zhi, &zlo, zhi, zlo, y, 0.0);
@@ -4522,8 +4223,10 @@
     *lo = zlo;
-long double reference_powl(long double x, long double y)
+long double reference_powl( long double x, long double y )
     // this will be used for testing doubles i.e. arguments will
     // be doubles so cast the input back to double ... returned
     // result will be long double though .... > 53 bits of precision
@@ -4537,163 +4240,174 @@
     // causes errors. So we need to tread y as long double and convert it
     // to hi, lo doubles when performing y*log2(x).
-    static const double neg_epsilon = HEX_DBL(+, 1, 0, +, 53);
+//    double x = (double) xx;
+//    double y = (double) yy;
-    // if x = 1, return x for any y, even NaN
-    if (x == 1.0) return x;
+    static const double neg_epsilon = HEX_DBL( +, 1, 0, +, 53 );
-    // if y == 0, return 1 for any x, even NaN
-    if (y == 0.0) return 1.0L;
+    //if x = 1, return x for any y, even NaN
+    if( x == 1.0 )
+        return x;
-    // get NaNs out of the way
-    if (x != x || y != y) return x + y;
+    //if y == 0, return 1 for any x, even NaN
+    if( y == 0.0 )
+        return 1.0L;
-    // do the work required to sort out edge cases
-    double fabsy = reference_fabs(y);
-    double fabsx = reference_fabs(x);
-    double iy = reference_rint(
-        fabsy); // we do round to nearest here so that |fy| <= 0.5
-    if (iy > fabsy) // convert nearbyint to floor
+    //get NaNs out of the way
+    if( x != x  || y != y )
+        return x + y;
+    //do the work required to sort out edge cases
+    double fabsy = reference_fabs( y );
+    double fabsx = reference_fabs( x );
+    double iy = reference_rint( fabsy );            //we do round to nearest here so that |fy| <= 0.5
+    if( iy > fabsy )//convert nearbyint to floor
         iy -= 1.0;
     int isOddInt = 0;
-    if (fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon)
-        isOddInt = (int)(iy - 2.0 * rint(0.5 * iy)); // might be 0, -1, or 1
+    if( fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon )
+        isOddInt =     (int) (iy - 2.0 * rint( 0.5 * iy ));        //might be 0, -1, or 1
-    /// test a few more edge cases
-    // deal with x == 0 cases
-    if (x == 0.0)
+    ///test a few more edge cases
+    //deal with x == 0 cases
+    if( x == 0.0 )
-        if (!isOddInt) x = 0.0;
+        if( ! isOddInt )
+            x = 0.0;
-        if (y < 0) x = 1.0 / x;
+        if( y < 0 )
+            x = 1.0/ x;
         return x;
-    // x == +-Inf cases
-    if (isinf(fabsx))
+    //x == +-Inf cases
+    if( isinf(fabsx) )
-        if (x < 0)
+        if( x < 0 )
-            if (isOddInt)
+            if( isOddInt )
-                if (y < 0)
+                if( y < 0 )
                     return -0.0;
                     return -INFINITY;
-                if (y < 0)
+                if( y < 0 )
                     return 0.0;
                     return INFINITY;
-        if (y < 0) return 0;
+        if( y < 0 )
+            return 0;
         return INFINITY;
-    // y = +-inf cases
-    if (isinf(fabsy))
+    //y = +-inf cases
+    if( isinf(fabsy) )
-        if (x == -1) return 1;
+        if( x == -1 )
+            return 1;
-        if (y < 0)
+        if( y < 0 )
-            if (fabsx < 1) return INFINITY;
+            if( fabsx < 1 )
+                return INFINITY;
             return 0;
-        if (fabsx < 1) return 0;
+        if( fabsx < 1 )
+            return 0;
         return INFINITY;
     // x < 0 and y non integer case
-    if (x < 0 && iy != fabsy)
+    if( x < 0 && iy != fabsy )
-        // return nan;
+        //return nan;
         return cl_make_nan();
-    // speedy resolution of sqrt and reciprocal sqrt
-    if (fabsy == 0.5)
+    //speedy resolution of sqrt and reciprocal sqrt
+    if( fabsy == 0.5 )
-        long double xl = sqrtl(x);
-        if (y < 0) xl = 1.0 / xl;
+        long double xl = sqrtl( x );
+        if( y < 0 )
+            xl = 1.0/ xl;
         return xl;
     double log2x_hi, log2x_lo;
-    // extended precision log .... accurate to at least 64-bits + couple of
-    // guard bits
+    // extended precision log .... accurate to at least 64-bits + couple of guard bits
     __log2_ep(&log2x_hi, &log2x_lo, fabsx);
     double ylog2x_hi, ylog2x_lo;
-    double y_hi = (double)y;
-    double y_lo = (double)(y - (long double)y_hi);
+    double y_hi = (double) y;
+    double y_lo = (double) ( y - (long double) y_hi);
     // compute product of y*log2(x)
     // scale to avoid overflow in double-double multiplication
-    if (reference_fabs(y) > HEX_DBL(+, 1, 0, +, 970))
-    {
+    if( reference_fabs( y ) > HEX_DBL( +, 1, 0, +, 970 ) ) {
         y_hi = reference_ldexp(y_hi, -53);
         y_lo = reference_ldexp(y_lo, -53);
     MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo);
-    if (fabs(y) > HEX_DBL(+, 1, 0, +, 970))
-    {
+    if( fabs( y ) > HEX_DBL( +, 1, 0, +, 970 ) ) {
         ylog2x_hi = reference_ldexp(ylog2x_hi, 53);
         ylog2x_lo = reference_ldexp(ylog2x_lo, 53);
     long double powxy;
-    if (isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200))
-    {
-        powxy =
-            reference_signbit(ylog2x_hi) ? HEX_DBL(+, 0, 0, +, 0) : INFINITY;
-    }
-    else
-    {
+    if(isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200)) {
+        powxy = reference_signbit(ylog2x_hi) ? HEX_DBL( +, 0, 0, +, 0 ) : INFINITY;
+    } else {
         // separate integer + fractional part
         long int m = lrint(ylog2x_hi);
         AddDD(&ylog2x_hi, &ylog2x_lo, ylog2x_hi, ylog2x_lo, -m, 0.0);
         // revert to long double arithemtic
-        long double ylog2x = (long double)ylog2x_hi + (long double)ylog2x_lo;
-        long double tmp = reference_exp2l(ylog2x);
+        long double ylog2x = (long double) ylog2x_hi + (long double) ylog2x_lo;
+        long double tmp = reference_exp2l( ylog2x );
         powxy = reference_scalblnl(tmp, m);
     // if y is odd integer and x is negative, reverse sign
-    if (isOddInt & reference_signbit(x)) powxy = -powxy;
+    if( isOddInt & reference_signbit(x))
+        powxy = -powxy;
     return powxy;
 double reference_nextafter(double xx, double yy)
-    float x = (float)xx;
-    float y = (float)yy;
+    float x = (float) xx;
+    float y = (float) yy;
     // take care of nans
-    if (x != x) return x;
+    if( x != x )
+        return x;
-    if (y != y) return y;
+    if( y != y )
+        return y;
-    if (x == y) return y;
+    if( x == y )
+        return y;
     int32f_t a, b;
-    a.f = x;
-    b.f = y;
+    a.f  = x;
+    b.f  = y;
-    if (a.i & 0x80000000) a.i = 0x80000000 - a.i;
-    if (b.i & 0x80000000) b.i = 0x80000000 - b.i;
+    if( a.i & 0x80000000 )
+        a.i = 0x80000000 - a.i;
+    if(b.i & 0x80000000 )
+        b.i = 0x80000000 - b.i;
     a.i += (a.i < b.i) ? 1 : -1;
-    a.i = (a.i < 0) ? (cl_int)0x80000000 - a.i : a.i;
+    a.i = (a.i < 0) ? (cl_int) 0x80000000 - a.i : a.i;
     return a.f;
@@ -4701,28 +4415,33 @@
 long double reference_nextafterl(long double xx, long double yy)
-    double x = (double)xx;
-    double y = (double)yy;
+    double x = (double) xx;
+    double y = (double) yy;
     // take care of nans
-    if (x != x) return x;
+    if( x != x )
+        return x;
-    if (y != y) return y;
+    if( y != y )
+        return y;
     int64d_t a, b;
-    a.d = x;
-    b.d = y;
+    a.d  = x;
+    b.d  = y;
     int64_t tmp = 0x8000000000000000LL;
-    if (a.l & tmp) a.l = tmp - a.l;
-    if (b.l & tmp) b.l = tmp - b.l;
+    if( a.l & tmp )
+        a.l = tmp - a.l;
+    if(b.l & tmp )
+        b.l = tmp - b.l;
-    // edge case. if (x == y) or (x = 0.0f and y = -0.0f) or (x = -0.0f and y =
-    // 0.0f) test needs to be done using integer rep because subnormals may be
-    // flushed to zero on some platforms
-    if (a.l == b.l) return y;
+    // edge case. if (x == y) or (x = 0.0f and y = -0.0f) or (x = -0.0f and y = 0.0f)
+    // test needs to be done using integer rep because
+    // subnormals may be flushed to zero on some platforms
+    if( a.l == b.l )
+        return y;
     a.l += (a.l < b.l) ? 1 : -1;
     a.l = (a.l < 0) ? tmp - a.l : a.l;
@@ -4732,108 +4451,112 @@
 double reference_fdim(double xx, double yy)
-    float x = (float)xx;
-    float y = (float)yy;
+    float x = (float) xx;
+    float y = (float) yy;
-    if (x != x) return x;
+    if( x != x )
+        return x;
-    if (y != y) return y;
+    if( y != y )
+        return y;
-    float r = (x > y) ? (float)reference_subtract(x, y) : 0.0f;
+    float r = ( x > y ) ? (float) reference_subtract( x, y) : 0.0f;
     return r;
 long double reference_fdiml(long double xx, long double yy)
-    double x = (double)xx;
-    double y = (double)yy;
+    double x = (double) xx;
+    double y = (double) yy;
-    if (x != x) return x;
+    if( x != x )
+        return x;
-    if (y != y) return y;
+    if( y != y )
+        return y;
-    double r = (x > y) ? (double)reference_subtractl(x, y) : 0.0;
+    double r = ( x > y ) ? (double) reference_subtractl(x, y) : 0.0;
     return r;
 double reference_remquo(double xd, double yd, int *n)
-    float xx = (float)xd;
-    float yy = (float)yd;
+    float xx = (float) xd;
+    float yy = (float) yd;
-    if (isnan(xx) || isnan(yy) || fabsf(xx) == INFINITY || yy == 0.0)
+    if( isnan(xx) || isnan(yy) ||
+        fabsf(xx) == INFINITY  ||
+        yy == 0.0 )
         *n = 0;
         return cl_make_nan();
-    if (fabsf(yy) == INFINITY || xx == 0.0f)
-    {
+    if( fabsf(yy) == INFINITY || xx == 0.0f ) {
         *n = 0;
         return xd;
-    if (fabsf(xx) == fabsf(yy))
-    {
+    if( fabsf(xx) == fabsf(yy) ) {
         *n = (xx == yy) ? 1 : -1;
-        return reference_signbit(xx) ? -0.0 : 0.0;
+        return reference_signbit( xx ) ? -0.0 : 0.0;
-    int signx = reference_signbit(xx) ? -1 : 1;
-    int signy = reference_signbit(yy) ? -1 : 1;
+    int signx = reference_signbit( xx ) ? -1 : 1;
+    int signy = reference_signbit( yy ) ? -1 : 1;
     int signn = (signx == signy) ? 1 : -1;
     float x = fabsf(xx);
     float y = fabsf(yy);
     int ex, ey;
-    ex = reference_ilogb(x);
-    ey = reference_ilogb(y);
+    ex = reference_ilogb( x );
+    ey = reference_ilogb( y );
     float xr = x;
     float yr = y;
     uint32_t q = 0;
-    if (ex - ey >= -1)
-    {
+    if(ex-ey >= -1) {
-        yr = (float)reference_ldexp(y, -ey);
-        xr = (float)reference_ldexp(x, -ex);
+        yr = (float) reference_ldexp( y, -ey );
+        xr = (float) reference_ldexp( x, -ex );
-        if (ex - ey >= 0)
-        {
+        if(ex-ey >= 0) {
             int i;
-            for (i = ex - ey; i > 0; i--)
-            {
+            for(i = ex-ey; i > 0; i--) {
                 q <<= 1;
-                if (xr >= yr)
-                {
+                if(xr >= yr) {
                     xr -= yr;
                     q += 1;
                 xr += xr;
             q <<= 1;
-            if (xr > yr)
-            {
+            if( xr > yr ) {
                 xr -= yr;
                 q += 1;
-        else // ex-ey = -1
-            xr = reference_ldexp(xr, ex - ey);
+        else //ex-ey = -1
+            xr = reference_ldexp(xr, ex-ey);
-    if ((yr < 2.0f * xr) || ((yr == 2.0f * xr) && (q & 0x00000001)))
-    {
+    if( (yr < 2.0f*xr) || ( (yr == 2.0f*xr) && (q & 0x00000001) ) ) {
         xr -= yr;
         q += 1;
-    if (ex - ey >= -1) xr = reference_ldexp(xr, ey);
+    if(ex-ey >= -1)
+        xr = reference_ldexp(xr, ey);
     int qout = q & 0x0000007f;
-    if (signn < 0) qout = -qout;
-    if (xx < 0.0) xr = -xr;
+    if( signn < 0)
+        qout = -qout;
+    if( xx < 0.0 )
+        xr = -xr;
     *n = qout;
@@ -4842,80 +4565,80 @@
 long double reference_remquol(long double xd, long double yd, int *n)
-    double xx = (double)xd;
-    double yy = (double)yd;
-    if (isnan(xx) || isnan(yy) || fabs(xx) == INFINITY || yy == 0.0)
+    double xx = (double) xd;
+    double yy = (double) yd;
+    if( isnan(xx) || isnan(yy) ||
+        fabs(xx) == INFINITY  ||
+        yy == 0.0 )
         *n = 0;
         return cl_make_nan();
-    if (reference_fabs(yy) == INFINITY || xx == 0.0)
-    {
+    if( reference_fabs(yy) == INFINITY || xx == 0.0 ) {
         *n = 0;
         return xd;
-    if (reference_fabs(xx) == reference_fabs(yy))
-    {
+    if( reference_fabs(xx) == reference_fabs(yy) ) {
         *n = (xx == yy) ? 1 : -1;
-        return reference_signbit(xx) ? -0.0 : 0.0;
+        return reference_signbit( xx ) ? -0.0 : 0.0;
-    int signx = reference_signbit(xx) ? -1 : 1;
-    int signy = reference_signbit(yy) ? -1 : 1;
+    int signx = reference_signbit( xx ) ? -1 : 1;
+    int signy = reference_signbit( yy ) ? -1 : 1;
     int signn = (signx == signy) ? 1 : -1;
     double x = reference_fabs(xx);
     double y = reference_fabs(yy);
     int ex, ey;
-    ex = reference_ilogbl(x);
-    ey = reference_ilogbl(y);
+    ex = reference_ilogbl( x );
+    ey = reference_ilogbl( y );
     double xr = x;
     double yr = y;
     uint32_t q = 0;
-    if (ex - ey >= -1)
-    {
-        yr = reference_ldexp(y, -ey);
-        xr = reference_ldexp(x, -ex);
+    if(ex-ey >= -1) {
+        yr = reference_ldexp( y, -ey );
+        xr = reference_ldexp( x, -ex );
         int i;
-        if (ex - ey >= 0)
-        {
-            for (i = ex - ey; i > 0; i--)
-            {
+        if(ex-ey >= 0) {
+            for(i = ex-ey; i > 0; i--) {
                 q <<= 1;
-                if (xr >= yr)
-                {
+                if(xr >= yr) {
                     xr -= yr;
                     q += 1;
                 xr += xr;
             q <<= 1;
-            if (xr > yr)
-            {
+            if( xr > yr ) {
                 xr -= yr;
                 q += 1;
-            xr = reference_ldexp(xr, ex - ey);
+            xr = reference_ldexp(xr, ex-ey);
-    if ((yr < 2.0 * xr) || ((yr == 2.0 * xr) && (q & 0x00000001)))
-    {
+    if( (yr < 2.0*xr) || ( (yr == 2.0*xr) && (q & 0x00000001) ) ) {
         xr -= yr;
         q += 1;
-    if (ex - ey >= -1) xr = reference_ldexp(xr, ey);
+    if(ex-ey >= -1)
+        xr = reference_ldexp(xr, ey);
     int qout = q & 0x0000007f;
-    if (signn < 0) qout = -qout;
-    if (xx < 0.0) xr = -xr;
+    if( signn < 0)
+        qout = -qout;
+    if( xx < 0.0 )
+        xr = -xr;
     *n = qout;
     return xr;
@@ -4923,27 +4646,27 @@
 static double reference_scalbn(double x, int n)
-    if (reference_isinf(x) || reference_isnan(x) || x == 0.0) return x;
+    if(reference_isinf(x) || reference_isnan(x) || x == 0.0)
+        return x;
     int bias = 1023;
-    union {
-        double d;
-        cl_long l;
-    } u;
-    u.d = (double)x;
+    union { double d; cl_long l; } u;
+    u.d = (double) x;
     int e = (int)((u.l & 0x7ff0000000000000LL) >> 52);
-    if (e == 0)
+    if(e == 0)
         u.l |= ((cl_long)1023 << 52);
         u.d -= 1.0;
         e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022;
     e += n;
-    if (e >= 2047 || n >= 2098) return reference_copysign(INFINITY, x);
-    if (e < -51 || n < -2097) return reference_copysign(0.0, x);
-    if (e <= 0)
+    if(e >= 2047 || n >= 2098 )
+        return reference_copysign(INFINITY, x);
+    if(e < -51 || n <-2097 )
+        return reference_copysign(0.0, x);
+    if(e <= 0)
-        bias += (e - 1);
+        bias += (e-1);
         e = 1;
     u.l &= 0x800fffffffffffffLL;
@@ -4956,26 +4679,26 @@
 static long double reference_scalblnl(long double x, long n)
 #if defined(__i386__) || defined(__x86_64__) // INTEL
-    union {
+    union
+    {
         long double d;
-        struct
-        {
-            cl_ulong m;
-            cl_ushort sexp;
-        } u;
-    } u;
+        struct{ cl_ulong m; cl_ushort sexp;}u;
+    }u;
     u.u.m = CL_LONG_MIN;
-    if (reference_isinf(x)) return x;
+    if ( reference_isinf(x) )
+        return x;
-    if (x == 0.0L || n < -2200) return reference_copysignl(0.0L, x);
+    if( x == 0.0L || n < -2200)
+        return reference_copysignl( 0.0L, x );
-    if (n > 2200) return reference_copysignl(INFINITY, x);
+    if( n > 2200 )
+        return reference_copysignl( INFINITY, x );
-    if (n < 0)
+    if( n < 0 )
         u.u.sexp = 0x3fff - 1022;
-        while (n <= -1022)
+        while( n <= -1022 )
             x *= u.d;
             n += 1022;
@@ -4985,10 +4708,10 @@
         return x;
-    if (n > 0)
+    if( n > 0 )
         u.u.sexp = 0x3fff + 1023;
-        while (n >= 1023)
+        while( n >= 1023 )
             x *= u.d;
             n -= 1023;
@@ -5003,27 +4726,27 @@
 #elif defined(__arm__) // ARM .. sizeof(long double) == sizeof(double)
 #if __DBL_MAX_EXP__ >= __LDBL_MAX_EXP__
-    if (reference_isinfl(x) || reference_isnanl(x)) return x;
+    if(reference_isinfl(x) || reference_isnanl(x))
+        return x;
     int bias = 1023;
-    union {
-        double d;
-        cl_long l;
-    } u;
-    u.d = (double)x;
+    union { double d; cl_long l; } u;
+    u.d = (double) x;
     int e = (int)((u.l & 0x7ff0000000000000LL) >> 52);
-    if (e == 0)
+    if(e == 0)
         u.l |= ((cl_long)1023 << 52);
         u.d -= 1.0;
         e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022;
     e += n;
-    if (e >= 2047) return reference_copysignl(INFINITY, x);
-    if (e < -51) return reference_copysignl(0.0, x);
-    if (e <= 0)
+    if(e >= 2047)
+        return reference_copysignl(INFINITY, x);
+    if(e < -51)
+        return reference_copysignl(0.0, x);
+    if(e <= 0)
-        bias += (e - 1);
+        bias += (e-1);
         e = 1;
     u.l &= 0x800fffffffffffffLL;
@@ -5033,255 +4756,284 @@
     return x * u.d;
-#else // PPC
+#else  // PPC
     return scalblnl(x, n);
-double reference_relaxed_exp(double x) { return reference_exp(x); }
+double reference_relaxed_exp( double x )
+  return reference_exp(x);
 double reference_exp(double x)
-    return reference_exp2(x * HEX_DBL(+, 1, 71547652b82fe, +, 0));
+  return reference_exp2( x * HEX_DBL( +, 1, 71547652b82fe, +, 0 ) );
 long double reference_expl(long double x)
 #if defined(__PPC__)
-    long double scale, bias;
+  long double scale, bias;
-    // The PPC double long version of expl fails to produce denorm results
-    // and instead generates a 0.0. Compensate for this limitation by
-    // computing expl as:
-    //     expl(x + 40) * expl(-40)
-    // Likewise, overflows can prematurely produce an infinity, so we
-    // compute expl as:
-    //     expl(x - 40) * expl(40)
-    scale = 1.0L;
-    bias = 0.0L;
-    if (x < -708.0L)
-    {
-        bias = 40.0;
-        scale = expl(-40.0L);
-    }
-    else if (x > 708.0L)
-    {
-        bias = -40.0L;
-        scale = expl(40.0L);
-    }
-    return expl(x + bias) * scale;
+  // The PPC double long version of expl fails to produce denorm results
+  // and instead generates a 0.0. Compensate for this limitation by
+  // computing expl as:
+  //     expl(x + 40) * expl(-40)
+  // Likewise, overflows can prematurely produce an infinity, so we
+  // compute expl as:
+  //     expl(x - 40) * expl(40)
+  scale = 1.0L;
+  bias = 0.0L;
+  if (x < -708.0L) {
+    bias = 40.0;
+    scale = expl(-40.0L);
+  } else if (x > 708.0L) {
+    bias = -40.0L;
+    scale = expl(40.0L);
+  }
+  return expl(x + bias) * scale;
-    return expl(x);
+    return expl( x );
-double reference_sinh(double x) { return sinh(x); }
+double reference_sinh(double x)
+    return sinh(x);
-long double reference_sinhl(long double x) { return sinhl(x); }
+long double reference_sinhl(long double x)
+    return sinhl(x);
 double reference_fmod(double x, double y)
-    if (x == 0.0 && fabs(y) > 0.0) return x;
+    if( x == 0.0 && fabs(y) > 0.0 )
+        return x;
-    if (fabs(x) == INFINITY || y == 0) return cl_make_nan();
+    if( fabs(x) == INFINITY || y == 0 )
+        return cl_make_nan();
-    if (fabs(y) == INFINITY) // we know x is finite from above
+    if( fabs(y) == INFINITY )    // we know x is finite from above
         return x;
 #if defined(_MSC_VER) && defined(_M_X64)
-    return fmod(x, y);
+    return fmod( x, y );
-    return fmodf((float)x, (float)y);
+    return fmodf( (float) x, (float) y );
 long double reference_fmodl(long double x, long double y)
-    if (x == 0.0L && fabsl(y) > 0.0L) return x;
-    if (fabsl(x) == INFINITY || y == 0.0L) return cl_make_nan();
-    if (fabsl(y) == INFINITY) // we know x is finite from above
+    if( x == 0.0L && fabsl(y) > 0.0L )
         return x;
-    return fmod((double)x, (double)y);
+    if( fabsl(x) == INFINITY || y == 0.0L )
+        return cl_make_nan();
+    if( fabsl(y) == INFINITY )    // we know x is finite from above
+        return x;
+    return fmod( (double) x, (double) y );
 double reference_modf(double x, double *n)
-    if (isnan(x))
-    {
+    if(isnan(x)) {
         *n = cl_make_nan();
         return cl_make_nan();
     float nr;
-    float yr = modff((float)x, &nr);
+    float yr = modff((float) x, &nr);
     *n = nr;
     return yr;
 long double reference_modfl(long double x, long double *n)
-    if (isnan(x))
-    {
+    if(isnan(x)) {
         *n = cl_make_nan();
         return cl_make_nan();
     double nr;
-    double yr = modf((double)x, &nr);
+    double yr = modf((double) x, &nr);
     *n = nr;
     return yr;
-long double reference_fractl(long double x, long double *ip)
+long double reference_fractl(long double x, long double *ip )
-    if (isnan(x))
-    {
+    if(isnan(x)) {
         *ip = cl_make_nan();
         return cl_make_nan();
     double i;
-    double f = modf((double)x, &i);
-    if (f < 0.0)
+    double f = modf((double) x, &i );
+    if( f < 0.0 )
         f = 1.0 + f;
         i -= 1.0;
-        if (f == 1.0) f = HEX_DBL(+, 1, fffffffffffff, -, 1);
+        if( f == 1.0 )
+            f = HEX_DBL( +, 1, fffffffffffff, -, 1 );
     *ip = i;
     return f;
-long double reference_fabsl(long double x) { return fabsl(x); }
-double reference_relaxed_log(double x)
+long double reference_fabsl(long double x)
-    return (float)reference_log((float)x);
+    return fabsl( x );
+double reference_relaxed_log( double x )
+  return (float)reference_log((float)x);
 double reference_log(double x)
-    if (x == 0.0) return -INFINITY;
+    if( x == 0.0 )
+        return -INFINITY;
-    if (x < 0.0) return cl_make_nan();
+    if( x < 0.0 )
+        return cl_make_nan();
-    if (isinf(x)) return INFINITY;
+    if( isinf(x) )
+        return INFINITY;
-    double log2Hi = HEX_DBL(+, 1, 62e42fefa39ef, -, 1);
+    double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 );
     double logxHi, logxLo;
     __log2_ep(&logxHi, &logxLo, x);
-    return logxHi * log2Hi;
+    return logxHi*log2Hi;
 long double reference_logl(long double x)
-    if (x == 0.0) return -INFINITY;
+    if( x == 0.0 )
+        return -INFINITY;
-    if (x < 0.0) return cl_make_nan();
+    if( x < 0.0 )
+        return cl_make_nan();
-    if (isinf(x)) return INFINITY;
+    if( isinf(x) )
+        return INFINITY;
-    double log2Hi = HEX_DBL(+, 1, 62e42fefa39ef, -, 1);
-    double log2Lo = HEX_DBL(+, 1, abc9e3b39803f, -, 56);
+    double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 );
+    double log2Lo = HEX_DBL( +, 1, abc9e3b39803f, -, 56 );
     double logxHi, logxLo;
     __log2_ep(&logxHi, &logxLo, x);
-    long double lg2 = (long double)log2Hi + (long double)log2Lo;
-    long double logx = (long double)logxHi + (long double)logxLo;
-    return logx * lg2;
+    //double rhi, rlo;
+    //MulDD(&rhi, &rlo, logxHi, logxLo, log2Hi, log2Lo);
+    //return (long double) rhi + (long double) rlo;
+    long double lg2 = (long double) log2Hi + (long double) log2Lo;
+    long double logx = (long double) logxHi + (long double) logxLo;
+    return logx*lg2;
-double reference_relaxed_pow(double x, double y)
-    return (float)reference_exp2(((float)y) * (float)reference_log2((float)x));
+double reference_relaxed_pow( double x, double y) {
+  return (float)reference_exp2( ((float)y) * (float)reference_log2((float)x));
-double reference_pow(double x, double y)
+double reference_pow( double x, double y )
-    static const double neg_epsilon = HEX_DBL(+, 1, 0, +, 53);
+    static const double neg_epsilon = HEX_DBL( +, 1, 0, +, 53 );
-    // if x = 1, return x for any y, even NaN
-    if (x == 1.0) return x;
+    //if x = 1, return x for any y, even NaN
+    if( x == 1.0 )
+        return x;
-    // if y == 0, return 1 for any x, even NaN
-    if (y == 0.0) return 1.0;
+    //if y == 0, return 1 for any x, even NaN
+    if( y == 0.0 )
+        return 1.0;
-    // get NaNs out of the way
-    if (x != x || y != y) return x + y;
+    //get NaNs out of the way
+    if( x != x  || y != y )
+        return x + y;
-    // do the work required to sort out edge cases
-    double fabsy = reference_fabs(y);
-    double fabsx = reference_fabs(x);
-    double iy = reference_rint(
-        fabsy); // we do round to nearest here so that |fy| <= 0.5
-    if (iy > fabsy) // convert nearbyint to floor
+    //do the work required to sort out edge cases
+    double fabsy = reference_fabs( y );
+    double fabsx = reference_fabs( x );
+    double iy = reference_rint( fabsy );            //we do round to nearest here so that |fy| <= 0.5
+    if( iy > fabsy )//convert nearbyint to floor
         iy -= 1.0;
     int isOddInt = 0;
-    if (fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon)
-        isOddInt = (int)(iy - 2.0 * rint(0.5 * iy)); // might be 0, -1, or 1
+    if( fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon )
+        isOddInt =     (int) (iy - 2.0 * rint( 0.5 * iy ));        //might be 0, -1, or 1
-    /// test a few more edge cases
-    // deal with x == 0 cases
-    if (x == 0.0)
+    ///test a few more edge cases
+    //deal with x == 0 cases
+    if( x == 0.0 )
-        if (!isOddInt) x = 0.0;
+        if( ! isOddInt )
+            x = 0.0;
-        if (y < 0) x = 1.0 / x;
+        if( y < 0 )
+            x = 1.0/ x;
         return x;
-    // x == +-Inf cases
-    if (isinf(fabsx))
+    //x == +-Inf cases
+    if( isinf(fabsx) )
-        if (x < 0)
+        if( x < 0 )
-            if (isOddInt)
+            if( isOddInt )
-                if (y < 0)
+                if( y < 0 )
                     return -0.0;
                     return -INFINITY;
-                if (y < 0)
+                if( y < 0 )
                     return 0.0;
                     return INFINITY;
-        if (y < 0) return 0;
+        if( y < 0 )
+            return 0;
         return INFINITY;
-    // y = +-inf cases
-    if (isinf(fabsy))
+    //y = +-inf cases
+    if( isinf(fabsy) )
-        if (x == -1) return 1;
+        if( x == -1 )
+            return 1;
-        if (y < 0)
+        if( y < 0 )
-            if (fabsx < 1) return INFINITY;
+            if( fabsx < 1 )
+                return INFINITY;
             return 0;
-        if (fabsx < 1) return 0;
+        if( fabsx < 1 )
+            return 0;
         return INFINITY;
     // x < 0 and y non integer case
-    if (x < 0 && iy != fabsy)
+    if( x < 0 && iy != fabsy )
-        // return nan;
+        //return nan;
         return cl_make_nan();
-    // speedy resolution of sqrt and reciprocal sqrt
-    if (fabsy == 0.5)
+    //speedy resolution of sqrt and reciprocal sqrt
+    if( fabsy == 0.5 )
-        long double xl = reference_sqrt(x);
-        if (y < 0) xl = 1.0 / xl;
+        long double xl = reference_sqrt( x );
+        if( y < 0 )
+            xl = 1.0/ xl;
         return xl;
@@ -5292,55 +5044,73 @@
     return isOddInt ? reference_copysignd(result, x) : result;
-double reference_sqrt(double x) { return sqrt(x); }
+double reference_sqrt(double x)
+    return sqrt(x);
-double reference_floor(double x) { return floorf((float)x); }
+double reference_floor(double x)
+    return floorf((float) x);
 double reference_ldexp(double value, int exponent)
 #ifdef __MINGW32__
-    /*
-     * ====================================================
-     * This function is from fdlibm:
-     *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-     *
-     * Developed at SunSoft, a Sun Microsystems, Inc. business.
-     * Permission to use, copy, modify, and distribute this
-     * software is freely granted, provided that this notice
-     * is preserved.
-     * ====================================================
-     */
-    if (!finite(value) || value == 0.0) return value;
-    return scalbn(value, exponent);
+ * ====================================================
+ * This function is from fdlibm:
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if(!finite(value)||value==0.0) return value;
+    return scalbn(value,exponent);
     return reference_scalbn(value, exponent);
-long double reference_ldexpl(long double x, int n) { return ldexpl(x, n); }
+long double reference_ldexpl(long double x, int n)
+    return ldexpl( x, n);
-long double reference_coshl(long double x) { return coshl(x); }
+long double reference_coshl(long double x)
+    return coshl(x);
-double reference_ceil(double x) { return ceilf((float)x); }
+double reference_ceil(double x)
+    return ceilf((float) x);
 long double reference_ceill(long double x)
-    if (x == 0.0 || reference_isinfl(x) || reference_isnanl(x)) return x;
+    if( x == 0.0 || reference_isinfl(x) || reference_isnanl(x) )
+        return x;
     long double absx = reference_fabsl(x);
-    if (absx >= HEX_LDBL(+, 1, 0, +, 52)) return x;
+    if( absx >= HEX_LDBL( +, 1, 0, +, 52 ) )
+        return x;
-    if (absx < 1.0)
+    if( absx < 1.0 )
-        if (x < 0.0)
+        if( x < 0.0 )
             return 0.0;
             return 1.0;
-    long double r = (long double)((cl_long)x);
+    long double r = (long double) ((cl_long) x);
-    if (x > 0.0 && r < x) r += 1.0;
+    if( x > 0.0 && r < x )
+        r += 1.0;
     return r;
@@ -5351,53 +5121,45 @@
     long double x2 = x * x;
     int i;
-    // Prepare a head + tail representation of PI in long double.  A good
-    // compiler should get rid of all of this work.
-    static const cl_ulong pi_bits[2] = {
-        0x3243F6A8885A308DULL, 0x313198A2E0370734ULL
-    }; // first 126 bits of pi
-       //
+    //Prepare a head + tail representation of PI in long double.  A good compiler should get rid of all of this work.
+    static const cl_ulong pi_bits[2] = { 0x3243F6A8885A308DULL, 0x313198A2E0370734ULL};  // first 126 bits of pi
     long double head, tail, temp;
 #if __LDBL_MANT_DIG__ >= 64
     // long double has 64-bits of precision or greater
-    temp = (long double)pi_bits[0] * 0x1.0p64L;
-    head = temp + (long double)pi_bits[1];
-    temp -= head; // rounding err rounding pi_bits[1] into head
-    tail = (long double)pi_bits[1] + temp;
-    head *= HEX_LDBL(+, 1, 0, -, 125);
-    tail *= HEX_LDBL(+, 1, 0, -, 125);
+    temp = (long double) pi_bits[0] * 0x1.0p64L;
+    head = temp + (long double) pi_bits[1];
+    temp -= head;           // rounding err rounding pi_bits[1] into head
+    tail = (long double) pi_bits[1] + temp;
+    head *= HEX_LDBL( +, 1, 0, -, 125 );
+    tail *= HEX_LDBL( +, 1, 0, -, 125 );
-    head = (long double)pi_bits[0];
-    tail =
-        (long double)((cl_long)pi_bits[0]
-                      - (cl_long)
-                          head); // residual part of pi_bits[0] after rounding
-    tail = tail * HEX_LDBL(+, 1, 0, +, 64) + (long double)pi_bits[1];
-    head *= HEX_LDBL(+, 1, 0, -, 61);
-    tail *= HEX_LDBL(+, 1, 0, -, 125);
+    head = (long double) pi_bits[0];
+    tail = (long double) ((cl_long) pi_bits[0] - (cl_long) head );       // residual part of pi_bits[0] after rounding
+    tail = tail * HEX_LDBL( +, 1, 0, +, 64 ) + (long double) pi_bits[1];
+    head *= HEX_LDBL( +, 1, 0, -, 61 );
+    tail *= HEX_LDBL( +, 1, 0, -, 125 );
     // oversize values and NaNs go to NaN
-    if (!(x2 <= 1.0)) return sqrtl(1.0L - x2);
+    if( ! (x2 <= 1.0) )
+        return sqrtl(1.0L - x2 );
     // deal with large |x|:
     //                                                      sqrt( 1 - x**2)
-    // acos(|x| > sqrt(0.5)) = 2 * atan( z );       z = -------------------- ;
-    // z in [0, sqrt(0.5)/(1+sqrt(0.5) = .4142135...]
+    // acos(|x| > sqrt(0.5)) = 2 * atan( z );       z = -------------------- ;      z in [0, sqrt(0.5)/(1+sqrt(0.5) = .4142135...]
     //                                                          1 + x
-    if (x2 > 0.5)
+    if( x2 > 0.5 )
         // we handle the x < 0 case as pi - acos(|x|)
-        long double sign = reference_copysignl(1.0L, x);
-        long double fabsx = reference_fabsl(x);
-        head -= head * sign; // x > 0 ? 0 : pi.hi
-        tail -= tail * sign; // x > 0 ? 0 : pi.low
+        long double sign = reference_copysignl( 1.0L, x );
+        long double fabsx = reference_fabsl( x );
+        head -= head * sign;        // x > 0 ? 0 : pi.hi
+        tail -= tail * sign;        // x > 0 ? 0 : pi.low
-        // z = sqrt( 1-x**2 ) / (1+x) = sqrt( (1-x)(1+x) / (1+x)**2 ) = sqrt(
-        // (1-x)/(1+x) )
-        long double z2 = (1.0L - fabsx) / (1.0L + fabsx); // z**2
+        // z = sqrt( 1-x**2 ) / (1+x) = sqrt( (1-x)(1+x) / (1+x)**2 ) = sqrt( (1-x)/(1+x) )
+        long double z2 = (1.0L - fabsx) / (1.0L + fabsx);   // z**2
         long double z = sign * sqrtl(z2);
         //                     atan(sqrt(q))
@@ -5407,41 +5169,29 @@
         // Define q = r*r, and solve for atan(r):
         //  atan(r) = (p(r) + 1) * r = rp(r) + r
-        static long double atan_coeffs[] = {
-            HEX_LDBL(-, b, 3f52e0c278293b3, -, 67),
-            HEX_LDBL(-, a, aaaaaaaaaaa95b8, -, 5),
-            HEX_LDBL(+, c, ccccccccc992407, -, 6),
-            HEX_LDBL(-, 9, 24924923024398, -, 6),
-            HEX_LDBL(+, e, 38e38d6f92c98f3, -, 7),
-            HEX_LDBL(-, b, a2e89bfb8393ec6, -, 7),
-            HEX_LDBL(+, 9, d89a9f574d412cb, -, 7),
-            HEX_LDBL(-, 8, 88580517884c547, -, 7),
-            HEX_LDBL(+, f, 0ab6756abdad408, -, 8),
-            HEX_LDBL(-, d, 56a5b07a2f15b49, -, 8),
-            HEX_LDBL(+, b, 72ab587e46d80b2, -, 8),
-            HEX_LDBL(-, 8, 62ea24bb5b2e636, -, 8),
-            HEX_LDBL(+, e, d67c16582123937, -, 10)
-        }; // minimax fit over [ 0x1.0p-52, 0.18]   Max error:
-           // 0x1.67ea5c184e5d9p-64
+        static long double atan_coeffs[] = { HEX_LDBL( -, b, 3f52e0c278293b3, -, 67 ), HEX_LDBL( -, a, aaaaaaaaaaa95b8, -, 5 ),
+                                             HEX_LDBL( +, c, ccccccccc992407, -,  6 ), HEX_LDBL( -, 9, 24924923024398,  -, 6 ),
+                                             HEX_LDBL( +, e, 38e38d6f92c98f3, -,  7 ), HEX_LDBL( -, b, a2e89bfb8393ec6, -, 7 ),
+                                             HEX_LDBL( +, 9, d89a9f574d412cb, -,  7 ), HEX_LDBL( -, 8, 88580517884c547, -, 7 ),
+                                             HEX_LDBL( +, f, 0ab6756abdad408, -,  8 ), HEX_LDBL( -, d, 56a5b07a2f15b49, -, 8 ),
+                                             HEX_LDBL( +, b, 72ab587e46d80b2, -,  8 ), HEX_LDBL( -, 8, 62ea24bb5b2e636, -, 8 ),
+                                             HEX_LDBL( +, e, d67c16582123937, -, 10 ) }; // minimax fit over [ 0x1.0p-52, 0.18]   Max error:  0x1.67ea5c184e5d9p-64
         // Calculate y = p(r)
-        const size_t atan_coeff_count =
-            sizeof(atan_coeffs) / sizeof(atan_coeffs[0]);
-        long double y = atan_coeffs[atan_coeff_count - 1];
-        for (i = (int)atan_coeff_count - 2; i >= 0; i--)
+        const size_t atan_coeff_count = sizeof( atan_coeffs ) / sizeof( atan_coeffs[0] );
+        long double y = atan_coeffs[ atan_coeff_count - 1];
+        for( i = (int)atan_coeff_count - 2; i >= 0; i-- )
             y = atan_coeffs[i] + y * z2;
-        z *= 2.0L; // fold in 2.0 for 2.0 * atan(z)
-        y *= z; // rp(r)
+        z *= 2.0L;   // fold in 2.0 for 2.0 * atan(z)
+        y *= z;      // rp(r)
         return head + ((y + tail) + z);
     // do |x| <= sqrt(0.5) here
-    //                                                     acos( sqrt(z) ) -
-    //                                                     PI/2
-    //  Piecewise minimax polynomial fits for p(z) = 1 +
-    //  ------------------------;
+    //                                                     acos( sqrt(z) ) - PI/2
+    //  Piecewise minimax polynomial fits for p(z) = 1 + ------------------------;
     //                                                            sqrt(z)
     //  Define z = x*x, and solve for acos(x) over x in  x >= 0:
@@ -5449,88 +5199,52 @@
     //      acos( sqrt(z) ) = acos(x) = x*(p(z)-1) + PI/2 = xp(x**2) - x + PI/2
     const long double coeffs[4][14] = {
-        { HEX_LDBL(-, a, fa7382e1f347974, -, 10),
-          HEX_LDBL(-, b, 4d5a992de1ac4da, -, 6),
-          HEX_LDBL(-, a, c526184bd558c17, -, 7),
-          HEX_LDBL(-, d, 9ed9b0346ec092a, -, 8),
-          HEX_LDBL(-, 9, dca410c1f04b1f, -, 8),
-          HEX_LDBL(-, f, 76e411ba9581ee5, -, 9),
-          HEX_LDBL(-, c, c71b00479541d8e, -, 9),
-          HEX_LDBL(-, a, f527a3f9745c9de, -, 9),
-          HEX_LDBL(-, 9, a93060051f48d14, -, 9),
-          HEX_LDBL(-, 8, b3d39ad70e06021, -, 9),
-          HEX_LDBL(-, f, f2ab95ab84f79c, -, 10),
-          HEX_LDBL(-, e, d1af5f5301ccfe4, -, 10),
-          HEX_LDBL(-, e, 1b53ba562f0f74a, -, 10),
-          HEX_LDBL(-, d, 6a3851330e15526, -,
-                   10) }, // x - 0.0625 in [ -0x1.fffffffffp-5, 0x1.0p-4 ]
-                          // Error: 0x1.97839bf07024p-76
+                                    { HEX_LDBL( -, a, fa7382e1f347974, -, 10 ), HEX_LDBL( -, b, 4d5a992de1ac4da, -,  6 ),
+                                      HEX_LDBL( -, a, c526184bd558c17, -,  7 ), HEX_LDBL( -, d, 9ed9b0346ec092a, -,  8 ),
+                                      HEX_LDBL( -, 9, dca410c1f04b1f,  -,  8 ), HEX_LDBL( -, f, 76e411ba9581ee5, -,  9 ),
+                                      HEX_LDBL( -, c, c71b00479541d8e, -,  9 ), HEX_LDBL( -, a, f527a3f9745c9de, -,  9 ),
+                                      HEX_LDBL( -, 9, a93060051f48d14, -,  9 ), HEX_LDBL( -, 8, b3d39ad70e06021, -,  9 ),
+                                      HEX_LDBL( -, f, f2ab95ab84f79c,  -, 10 ), HEX_LDBL( -, e, d1af5f5301ccfe4, -, 10 ),
+                                      HEX_LDBL( -, e, 1b53ba562f0f74a, -, 10 ), HEX_LDBL( -, d, 6a3851330e15526, -, 10 ) },  // x - 0.0625 in [ -0x1.fffffffffp-5, 0x1.0p-4 ]    Error: 0x1.97839bf07024p-76
-        { HEX_LDBL(-, 8, c2f1d638e4c1b48, -, 8),
-          HEX_LDBL(-, c, d47ac903c311c2c, -, 6),
-          HEX_LDBL(-, d, e020b2dabd5606a, -, 7),
-          HEX_LDBL(-, a, 086fafac220f16b, -, 7),
-          HEX_LDBL(-, 8, 55b5efaf6b86c3e, -, 7),
-          HEX_LDBL(-, f, 05c9774fed2f571, -, 8),
-          HEX_LDBL(-, e, 484a93f7f0fc772, -, 8),
-          HEX_LDBL(-, e, 1a32baef01626e4, -, 8),
-          HEX_LDBL(-, e, 528e525b5c9c73d, -, 8),
-          HEX_LDBL(-, e, ddd5d27ad49b2c8, -, 8),
-          HEX_LDBL(-, f, b3259e7ae10c6f, -, 8),
-          HEX_LDBL(-, 8, 68998170d5b19b7, -, 7),
-          HEX_LDBL(-, 9, 4468907f007727, -, 7),
-          HEX_LDBL(-, a, 2ad5e4906a8e7b3, -,
-                   7) }, // x - 0.1875 in [ -0x1.0p-4, 0x1.0p-4 ]    Error:
-                         // 0x1.647af70073457p-73
+                                    { HEX_LDBL( -, 8, c2f1d638e4c1b48, -,  8 ), HEX_LDBL( -, c, d47ac903c311c2c, -,  6 ),
+                                      HEX_LDBL( -, d, e020b2dabd5606a, -,  7 ), HEX_LDBL( -, a, 086fafac220f16b, -,  7 ),
+                                      HEX_LDBL( -, 8, 55b5efaf6b86c3e, -,  7 ), HEX_LDBL( -, f, 05c9774fed2f571, -,  8 ),
+                                      HEX_LDBL( -, e, 484a93f7f0fc772, -,  8 ), HEX_LDBL( -, e, 1a32baef01626e4, -,  8 ),
+                                      HEX_LDBL( -, e, 528e525b5c9c73d, -,  8 ), HEX_LDBL( -, e, ddd5d27ad49b2c8, -,  8 ),
+                                      HEX_LDBL( -, f, b3259e7ae10c6f,  -,  8 ), HEX_LDBL( -, 8, 68998170d5b19b7, -,  7 ),
+                                      HEX_LDBL( -, 9, 4468907f007727,  -,  7 ), HEX_LDBL( -, a, 2ad5e4906a8e7b3, -,  7 ) },// x - 0.1875 in [ -0x1.0p-4, 0x1.0p-4 ]    Error: 0x1.647af70073457p-73
-        { HEX_LDBL(-, f, a76585ad399e7ac, -, 8),
-          HEX_LDBL(-, e, d665b7dd504ca7c, -, 6),
-          HEX_LDBL(-, 9, 4c7c2402bd4bc33, -, 6),
-          HEX_LDBL(-, f, ba76b69074ff71c, -, 7),
-          HEX_LDBL(-, f, 58117784bdb6d5f, -, 7),
-          HEX_LDBL(-, 8, 22ddd8eef53227d, -, 6),
-          HEX_LDBL(-, 9, 1d1d3b57a63cdb4, -, 6),
-          HEX_LDBL(-, a, 9c4bdc40cca848, -, 6),
-          HEX_LDBL(-, c, b673b12794edb24, -, 6),
-          HEX_LDBL(-, f, 9290a06e31575bf, -, 6),
-          HEX_LDBL(-, 9, b4929c16aeb3d1f, -, 5),
-          HEX_LDBL(-, c, 461e725765a7581, -, 5),
-          HEX_LDBL(-, 8, 0a59654c98d9207, -, 4),
-          HEX_LDBL(-, a, 6de6cbd96c80562, -,
-                   4) }, // x - 0.3125 in [ -0x1.0p-4, 0x1.0p-4 ]   Error:
-                         // 0x1.b0246c304ce1ap-70
+                                    { HEX_LDBL( -, f, a76585ad399e7ac, -,  8 ), HEX_LDBL( -, e, d665b7dd504ca7c, -,  6 ),
+                                      HEX_LDBL( -, 9, 4c7c2402bd4bc33, -,  6 ), HEX_LDBL( -, f, ba76b69074ff71c, -,  7 ),
+                                      HEX_LDBL( -, f, 58117784bdb6d5f, -,  7 ), HEX_LDBL( -, 8, 22ddd8eef53227d, -,  6 ),
+                                      HEX_LDBL( -, 9, 1d1d3b57a63cdb4, -,  6 ), HEX_LDBL( -, a, 9c4bdc40cca848,  -,  6 ),
+                                      HEX_LDBL( -, c, b673b12794edb24, -,  6 ), HEX_LDBL( -, f, 9290a06e31575bf, -,  6 ),
+                                      HEX_LDBL( -, 9, b4929c16aeb3d1f, -,  5 ), HEX_LDBL( -, c, 461e725765a7581, -,  5 ),
+                                      HEX_LDBL( -, 8, 0a59654c98d9207, -,  4 ), HEX_LDBL( -, a, 6de6cbd96c80562, -,  4 ) }, // x - 0.3125 in [ -0x1.0p-4, 0x1.0p-4 ]   Error: 0x1.b0246c304ce1ap-70
-        { HEX_LDBL(-, b, dca8b0359f96342, -, 7),
-          HEX_LDBL(-, 8, cd2522fcde9823, -, 5),
-          HEX_LDBL(-, d, 2af9397b27ff74d, -, 6),
-          HEX_LDBL(-, d, 723f2c2c2409811, -, 6),
-          HEX_LDBL(-, f, ea8f8481ecc3cd1, -, 6),
-          HEX_LDBL(-, a, 43fd8a7a646b0b2, -, 5),
-          HEX_LDBL(-, e, 01b0bf63a4e8d76, -, 5),
-          HEX_LDBL(-, 9, f0b7096a2a7b4d, -, 4),
-          HEX_LDBL(-, e, 872e7c5a627ab4c, -, 4),
-          HEX_LDBL(-, a, dbd760a1882da48, -, 3),
-          HEX_LDBL(-, 8, 424e4dea31dd273, -, 2),
-          HEX_LDBL(-, c, c05d7730963e793, -, 2),
-          HEX_LDBL(-, a, 523d97197cd124a, -, 1),
-          HEX_LDBL(-, 8, 307ba943978aaee, +,
-                   0) } // x - 0.4375 in [ -0x1.0p-4, 0x1.0p-4 ]  Error:
-                        // 0x1.9ecff73da69c9p-66
-    };
+                                    { HEX_LDBL( -, b, dca8b0359f96342, -,  7 ), HEX_LDBL( -, 8, cd2522fcde9823,  -,  5 ),
+                                      HEX_LDBL( -, d, 2af9397b27ff74d, -,  6 ), HEX_LDBL( -, d, 723f2c2c2409811, -,  6 ),
+                                      HEX_LDBL( -, f, ea8f8481ecc3cd1, -,  6 ), HEX_LDBL( -, a, 43fd8a7a646b0b2, -,  5 ),
+                                      HEX_LDBL( -, e, 01b0bf63a4e8d76, -,  5 ), HEX_LDBL( -, 9, f0b7096a2a7b4d,  -,  4 ),
+                                      HEX_LDBL( -, e, 872e7c5a627ab4c, -,  4 ), HEX_LDBL( -, a, dbd760a1882da48, -,  3 ),
+                                      HEX_LDBL( -, 8, 424e4dea31dd273, -,  2 ), HEX_LDBL( -, c, c05d7730963e793, -,  2 ),
+                                      HEX_LDBL( -, a, 523d97197cd124a, -,  1 ), HEX_LDBL( -, 8, 307ba943978aaee, +,  0 ) } // x - 0.4375 in [ -0x1.0p-4, 0x1.0p-4 ]  Error: 0x1.9ecff73da69c9p-66
+                                 };
     const long double offsets[4] = { 0.0625, 0.1875, 0.3125, 0.4375 };
-    const size_t coeff_count = sizeof(coeffs[0]) / sizeof(coeffs[0][0]);
+    const size_t coeff_count = sizeof( coeffs[0] ) / sizeof( coeffs[0][0] );
-    // reduce the incoming values a bit so that they are in the range
-    // [-0x1.0p-4, 0x1.0p-4]
+    // reduce the incoming values a bit so that they are in the range [-0x1.0p-4, 0x1.0p-4]
     const long double *c;
     i = x2 * 8.0L;
     c = coeffs[i];
-    x2 -= offsets[i]; // exact
+    x2 -= offsets[i];       // exact
     // calcualte p(x2)
-    long double y = c[coeff_count - 1];
-    for (i = (int)coeff_count - 2; i >= 0; i--) y = c[i] + y * x2;
+    long double y = c[ coeff_count - 1];
+    for( i = (int)coeff_count - 2; i >= 0; i-- )
+        y = c[i] + y * x2;
     // xp(x2)
     y *= x;
@@ -5539,50 +5253,58 @@
     return head + ((y + tail) - x);
-double reference_relaxed_acos(double x) { return reference_acos(x); }
 double reference_log10(double x)
-    if (x == 0.0) return -INFINITY;
+    if( x == 0.0 )
+        return -INFINITY;
-    if (x < 0.0) return cl_make_nan();
+    if( x < 0.0 )
+        return cl_make_nan();
-    if (isinf(x)) return INFINITY;
+    if( isinf(x) )
+        return INFINITY;
-    double log2Hi = HEX_DBL(+, 1, 34413509f79fe, -, 2);
+    double log2Hi = HEX_DBL( +, 1, 34413509f79fe, -, 2 );
     double logxHi, logxLo;
     __log2_ep(&logxHi, &logxLo, x);
-    return logxHi * log2Hi;
+    return logxHi*log2Hi;
-double reference_relaxed_log10(double x) { return reference_log10(x); }
 long double reference_log10l(long double x)
-    if (x == 0.0) return -INFINITY;
+    if( x == 0.0 )
+        return -INFINITY;
-    if (x < 0.0) return cl_make_nan();
+    if( x < 0.0 )
+        return cl_make_nan();
-    if (isinf(x)) return INFINITY;
+    if( isinf(x) )
+        return INFINITY;
-    double log2Hi = HEX_DBL(+, 1, 34413509f79fe, -, 2);
-    double log2Lo = HEX_DBL(+, 1, e623e2566b02d, -, 55);
+    double log2Hi = HEX_DBL( +, 1, 34413509f79fe, -, 2 );
+    double log2Lo = HEX_DBL( +, 1, e623e2566b02d, -, 55 );
     double logxHi, logxLo;
     __log2_ep(&logxHi, &logxLo, x);
-    long double lg2 = (long double)log2Hi + (long double)log2Lo;
-    long double logx = (long double)logxHi + (long double)logxLo;
-    return logx * lg2;
+    //double rhi, rlo;
+    //MulDD(&rhi, &rlo, logxHi, logxLo, log2Hi, log2Lo);
+    //return (long double) rhi + (long double) rlo;
+    long double lg2 = (long double) log2Hi + (long double) log2Lo;
+    long double logx = (long double) logxHi + (long double) logxLo;
+    return logx*lg2;
-double reference_acos(double x) { return acos(x); }
+double reference_acos(double x)
+    return acos( x );
 double reference_atan2(double x, double y)
 #if defined(_WIN32)
     // fix edge cases for Windows
-    if (isinf(x) && isinf(y))
-    {
+    if (isinf(x) && isinf(y)) {
         double retval = (y > 0) ? M_PI_4 : 3.f * M_PI_4;
         return (x > 0) ? retval : -retval;
@@ -5594,8 +5316,7 @@
 #if defined(_WIN32)
     // fix edge cases for Windows
-    if (isinf(x) && isinf(y))
-    {
+    if (isinf(x) && isinf(y)) {
         long double retval = (y > 0) ? M_PI_4 : 3.f * M_PI_4;
         return (x > 0) ? retval : -retval;
@@ -5605,7 +5326,7 @@
 double reference_frexp(double a, int *exp)
-    if (isnan(a) || isinf(a) || a == 0.0)
+    if(isnan(a) || isinf(a) || a == 0.0)
         *exp = 0;
         return a;
@@ -5623,7 +5344,7 @@
     u.l &= 0x7fffffffffffffffULL;
     int bias = -1022;
-    if ((u.l & 0x7ff0000000000000ULL) == 0)
+    if((u.l & 0x7ff0000000000000ULL) == 0)
         double d = u.l;
         u.d = d;
@@ -5642,13 +5363,13 @@
 long double reference_frexpl(long double a, int *exp)
-    if (isnan(a) || isinf(a) || a == 0.0)
+    if(isnan(a) || isinf(a) || a == 0.0)
         *exp = 0;
         return a;
-    if (sizeof(long double) == sizeof(double))
+    if(sizeof(long double) == sizeof(double))
         return reference_frexp(a, exp);
@@ -5659,64 +5380,90 @@
-double reference_atan(double x) { return atan(x); }
+double reference_atan(double x)
+    return atan( x );
-long double reference_atanl(long double x) { return atanl(x); }
+long double reference_atanl(long double x)
+    return atanl( x );
-long double reference_asinl(long double x) { return asinl(x); }
+long double reference_asinl(long double x)
+    return asinl( x );
-double reference_asin(double x) { return asin(x); }
+double reference_asin(double x)
+    return asin( x );
-double reference_relaxed_asin(double x) { return reference_asin(x); }
+double reference_fabs(double x)
+    return fabs( x);
-double reference_fabs(double x) { return fabs(x); }
-double reference_cosh(double x) { return cosh(x); }
+double reference_cosh(double x)
+    return cosh( x );
 long double reference_sqrtl(long double x)
-#if defined(__SSE2__)                                                          \
-    || (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)))
-    __m128d result128 = _mm_set_sd((double)x);
+#if defined( __SSE2__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    __m128d result128 = _mm_set_sd((double) x);
     result128 = _mm_sqrt_sd(result128, result128);
     return _mm_cvtsd_f64(result128);
     volatile double dx = x;
-    return sqrt(dx);
+    return sqrt( dx );
-long double reference_tanhl(long double x) { return tanhl(x); }
+long double reference_tanhl(long double x)
+    return tanhl( x );
 long double reference_floorl(long double x)
-    if (x == 0.0 || reference_isinfl(x) || reference_isnanl(x)) return x;
+    if( x == 0.0 || reference_isinfl(x) || reference_isnanl(x) )
+        return x;
     long double absx = reference_fabsl(x);
-    if (absx >= HEX_LDBL(+, 1, 0, +, 52)) return x;
+    if( absx >= HEX_LDBL( +, 1, 0, +, 52 ) )
+        return x;
-    if (absx < 1.0)
+    if( absx < 1.0 )
-        if (x < 0.0)
+        if( x < 0.0 )
             return -1.0;
             return 0.0;
-    long double r = (long double)((cl_long)x);
+    long double r = (long double) ((cl_long) x);
-    if (x < 0.0 && r > x) r -= 1.0;
+    if( x < 0.0 && r > x )
+        r -= 1.0;
     return r;
-double reference_tanh(double x) { return tanh(x); }
+double reference_tanh(double x)
+    return tanh( x );
-long double reference_assignmentl(long double x) { return x; }
+long double reference_assignmentl( long double x ){ return x; }
-int reference_notl(long double x)
+int reference_notl( long double x )
     int r = !x;
     return r;
diff --git a/test_conformance/math_brute_force/reference_math.h b/test_conformance/math_brute_force/reference_math.h
index 78b2451..bcd0df8 100644
--- a/test_conformance/math_brute_force/reference_math.h
+++ b/test_conformance/math_brute_force/reference_math.h
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,221 +16,217 @@
-#if defined(__APPLE__)
-#include <OpenCL/opencl.h>
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
-#include <CL/cl.h>
+    #include <CL/cl.h>
 // --  for testing float --
-double reference_sinh(double x);
-double reference_sqrt(double x);
-double reference_tanh(double x);
-double reference_acos(double);
-double reference_asin(double);
-double reference_atan(double);
-double reference_atan2(double, double);
-double reference_ceil(double);
-double reference_cosh(double);
-double reference_exp(double);
-double reference_fabs(double);
-double reference_acospi(double);
-double reference_asinpi(double);
-double reference_atanpi(double);
-double reference_atan2pi(double, double);
-double reference_cospi(double);
-double reference_divide(double, double);
-double reference_fract(double, double*);
-float reference_fma(float, float, float, int);
-double reference_mad(double, double, double);
-double reference_nextafter(double, double);
-double reference_recip(double);
-double reference_rootn(double, int);
-double reference_rsqrt(double);
-double reference_sincos(double, double*);
-double reference_sinpi(double);
-double reference_tanpi(double);
+double reference_sinh( double x );
+double reference_sqrt( double x );
+double reference_tanh( double x );
+double reference_acos( double );
+double reference_asin( double );
+double reference_atan( double );
+double reference_atan2( double, double );
+double reference_ceil( double );
+double reference_cosh( double );
+double reference_exp( double );
+double reference_fabs( double );
+double reference_acospi( double );
+double reference_asinpi( double );
+double reference_atanpi( double );
+double reference_atan2pi( double, double );
+double reference_cospi( double );
+double reference_divide( double, double );
+double reference_fract( double, double * );
+float  reference_fma( float, float, float, int );
+double reference_mad( double, double, double );
+double reference_nextafter(double, double );
+double reference_recip( double );
+double reference_rootn( double, int );
+double reference_rsqrt( double );
+double reference_sincos( double, double * );
+double reference_sinpi( double );
+double reference_tanpi( double );
 double reference_pow(double x, double y);
-double reference_pown(double, int);
-double reference_powr(double, double);
-double reference_cos(double);
-double reference_sin(double);
-double reference_tan(double);
-double reference_log(double);
-double reference_log10(double);
-double reference_modf(double, double* n);
+double reference_pown( double, int );
+double reference_powr( double, double );
+double reference_cos( double );
+double reference_sin( double );
+double reference_tan( double );
+double reference_log( double );
+double reference_log10( double );
+double reference_modf( double, double *n );
-double reference_fdim(double, double);
-double reference_add(double, double);
-double reference_subtract(double, double);
-double reference_divide(double, double);
-double reference_multiply(double, double);
-double reference_remquo(double, double, int*);
-double reference_lgamma_r(double, int*);
+double reference_fdim( double, double );
+double reference_add( double, double );
+double reference_subtract( double, double );
+double reference_divide( double, double );
+double reference_multiply( double, double );
+double reference_remquo( double, double, int* );
+double reference_lgamma_r( double, int* );
-int reference_isequal(double, double);
-int reference_isfinite(double);
-int reference_isgreater(double, double);
-int reference_isgreaterequal(double, double);
-int reference_isinf(double);
-int reference_isless(double, double);
-int reference_islessequal(double, double);
-int reference_islessgreater(double, double);
-int reference_isnan(double);
-int reference_isnormal(double);
-int reference_isnotequal(double, double);
-int reference_isordered(double, double);
-int reference_isunordered(double, double);
-int reference_signbit(float);
+int reference_isequal( double, double );
+int reference_isfinite( double );
+int reference_isgreater( double, double );
+int reference_isgreaterequal( double, double );
+int reference_isinf( double );
+int reference_isless( double, double );
+int reference_islessequal( double, double );
+int reference_islessgreater( double, double );
+int reference_isnan( double );
+int reference_isnormal( double );
+int reference_isnotequal( double, double );
+int reference_isordered( double, double );
+int reference_isunordered( double, double );
+int reference_signbit( float );
-double reference_acosh(double x);
-double reference_asinh(double x);
-double reference_atanh(double x);
+double reference_acosh( double x );
+double reference_asinh( double x );
+double reference_atanh( double x );
 double reference_cbrt(double x);
-float reference_copysign(float x, float y);
-double reference_copysignd(double x, double y);
-double reference_exp10(double);
-double reference_exp2(double x);
-double reference_expm1(double x);
-double reference_fmax(double x, double y);
-double reference_fmin(double x, double y);
-double reference_hypot(double x, double y);
-double reference_lgamma(double x);
-int reference_ilogb(double);
-double reference_log2(double x);
-double reference_log1p(double x);
-double reference_logb(double x);
-double reference_maxmag(double x, double y);
-double reference_minmag(double x, double y);
-double reference_nan(cl_uint x);
-double reference_reciprocal(double x);
-double reference_remainder(double x, double y);
-double reference_rint(double x);
-double reference_round(double x);
-double reference_trunc(double x);
-double reference_floor(double x);
-double reference_fmod(double x, double y);
-double reference_frexp(double x, int* n);
-double reference_ldexp(double x, int n);
+float reference_copysign( float x, float y);
+double reference_copysignd( double x, double y);
+double reference_exp10( double );
+double reference_exp2( double x );
+double reference_expm1( double x );
+double reference_fmax( double x, double y );
+double reference_fmin( double x, double y );
+double reference_hypot( double x, double y );
+double reference_lgamma( double x);
+int    reference_ilogb( double );
+double reference_log2( double x );
+double reference_log1p( double x );
+double reference_logb( double x );
+double reference_maxmag( double x, double y );
+double reference_minmag( double x, double y );
+double reference_nan( cl_uint x );
+double reference_reciprocal( double x );
+double reference_remainder( double x, double y );
+double reference_rint( double x );
+double reference_round( double x );
+double reference_trunc( double x );
+double reference_floor( double x );
+double reference_fmod( double x, double y );
+double reference_frexp( double x, int *n );
+double reference_ldexp( double x, int n );
-double reference_assignment(double x);
-int reference_not(double x);
+double reference_assignment( double x );
+int    reference_not( double x );
 // -- for testing fast-relaxed
-double reference_relaxed_acos(double);
-double reference_relaxed_asin(double);
-double reference_relaxed_atan(double);
-double reference_relaxed_mad(double, double, double);
-double reference_relaxed_divide(double x, double y);
-double reference_relaxed_sin(double x);
-double reference_relaxed_sinpi(double x);
-double reference_relaxed_cos(double x);
-double reference_relaxed_cospi(double x);
-double reference_relaxed_sincos(double x, double* y);
-double reference_relaxed_tan(double x);
-double reference_relaxed_exp(double x);
-double reference_relaxed_exp2(double x);
-double reference_relaxed_exp10(double x);
-double reference_relaxed_log(double x);
-double reference_relaxed_log2(double x);
-double reference_relaxed_log10(double x);
-double reference_relaxed_pow(double x, double y);
-double reference_relaxed_reciprocal(double x);
+double reference_relaxed_mad( double, double, double );
+double reference_relaxed_divide( double x, double y );
+double reference_relaxed_sin( double x );
+double reference_relaxed_cos( double x );
+double reference_relaxed_sincos( double x, double * y);
+double reference_relaxed_tan( double x );
+double reference_relaxed_exp( double x );
+double reference_relaxed_exp2( double x );
+double reference_relaxed_exp10( double x );
+double reference_relaxed_log( double x );
+double reference_relaxed_log2( double x );
+double reference_relaxed_pow( double x, double y);
+double reference_relaxed_reciprocal( double x );
 // -- for testing double --
-long double reference_sinhl(long double x);
-long double reference_sqrtl(long double x);
-long double reference_tanhl(long double x);
-long double reference_acosl(long double);
-long double reference_asinl(long double);
-long double reference_atanl(long double);
-long double reference_atan2l(long double, long double);
-long double reference_ceill(long double);
-long double reference_coshl(long double);
-long double reference_expl(long double);
-long double reference_fabsl(long double);
-long double reference_acospil(long double);
-long double reference_asinpil(long double);
-long double reference_atanpil(long double);
-long double reference_atan2pil(long double, long double);
-long double reference_cospil(long double);
-long double reference_dividel(long double, long double);
-long double reference_fractl(long double, long double*);
-long double reference_fmal(long double, long double, long double);
-long double reference_madl(long double, long double, long double);
-long double reference_nextafterl(long double, long double);
-long double reference_recipl(long double);
-long double reference_rootnl(long double, int);
-long double reference_rsqrtl(long double);
-long double reference_sincosl(long double, long double*);
-long double reference_sinpil(long double);
-long double reference_tanpil(long double);
+long double reference_sinhl( long double x );
+long double reference_sqrtl( long double x );
+long double reference_tanhl( long double x );
+long double reference_acosl( long double );
+long double reference_asinl( long double );
+long double reference_atanl( long double );
+long double reference_atan2l( long double, long double );
+long double reference_ceill( long double );
+long double reference_coshl( long double );
+long double reference_expl( long double );
+long double reference_fabsl( long double );
+long double reference_acospil( long double );
+long double reference_asinpil( long double );
+long double reference_atanpil( long double );
+long double reference_atan2pil( long double, long double );
+long double reference_cospil( long double );
+long double reference_dividel( long double, long double );
+long double reference_fractl( long double, long double * );
+long double reference_fmal( long double, long double, long double );
+long double reference_madl( long double, long double, long double );
+long double reference_nextafterl(long double, long double );
+long double reference_recipl( long double );
+long double reference_rootnl( long double, int );
+long double reference_rsqrtl( long double );
+long double reference_sincosl( long double, long double * );
+long double reference_sinpil( long double );
+long double reference_tanpil( long double );
 long double reference_powl(long double x, long double y);
-long double reference_pownl(long double, int);
-long double reference_powrl(long double, long double);
-long double reference_cosl(long double);
-long double reference_sinl(long double);
-long double reference_tanl(long double);
-long double reference_logl(long double);
-long double reference_log10l(long double);
-long double reference_modfl(long double, long double* n);
+long double reference_pownl( long double, int );
+long double reference_powrl( long double, long double );
+long double reference_cosl( long double );
+long double reference_sinl(long double );
+long double reference_tanl( long double );
+long double reference_logl( long double );
+long double reference_log10l( long double );
+long double reference_modfl( long double, long double *n );
-long double reference_fdiml(long double, long double);
-long double reference_addl(long double, long double);
-long double reference_subtractl(long double, long double);
-long double reference_dividel(long double, long double);
-long double reference_multiplyl(long double, long double);
-long double reference_remquol(long double, long double, int*);
-long double reference_lgamma_rl(long double, int*);
+long double reference_fdiml( long double, long double );
+long double reference_addl( long double, long double );
+long double reference_subtractl( long double, long double );
+long double reference_dividel( long double, long double );
+long double reference_multiplyl( long double, long double );
+long double reference_remquol( long double, long double, int* );
+long double reference_lgamma_rl( long double, int* );
-int reference_isequall(long double, long double);
-int reference_isfinitel(long double);
-int reference_isgreaterl(long double, long double);
-int reference_isgreaterequall(long double, long double);
-int reference_isinfl(long double);
-int reference_islessl(long double, long double);
-int reference_islessequall(long double, long double);
-int reference_islessgreaterl(long double, long double);
-int reference_isnanl(long double);
-int reference_isnormall(long double);
-int reference_isnotequall(long double, long double);
-int reference_isorderedl(long double, long double);
-int reference_isunorderedl(long double, long double);
-int reference_signbitl(long double);
+int reference_isequall( long double, long double );
+int reference_isfinitel( long double );
+int reference_isgreaterl( long double, long double );
+int reference_isgreaterequall( long double, long double );
+int reference_isinfl( long double );
+int reference_islessl( long double, long double );
+int reference_islessequall( long double, long double );
+int reference_islessgreaterl( long double, long double );
+int reference_isnanl( long double );
+int reference_isnormall( long double );
+int reference_isnotequall( long double, long double );
+int reference_isorderedl( long double, long double );
+int reference_isunorderedl( long double, long double );
+int reference_signbitl( long double );
-long double reference_acoshl(long double x);
-long double reference_asinhl(long double x);
-long double reference_atanhl(long double x);
+long double reference_acoshl( long double x );
+long double reference_asinhl( long double x );
+long double reference_atanhl( long double x );
 long double reference_cbrtl(long double x);
-long double reference_copysignl(long double x, long double y);
-long double reference_exp10l(long double);
-long double reference_exp2l(long double x);
-long double reference_expm1l(long double x);
-long double reference_fmaxl(long double x, long double y);
-long double reference_fminl(long double x, long double y);
-long double reference_hypotl(long double x, long double y);
-long double reference_lgammal(long double x);
-int reference_ilogbl(long double);
-long double reference_log2l(long double x);
-long double reference_log1pl(long double x);
-long double reference_logbl(long double x);
-long double reference_maxmagl(long double x, long double y);
-long double reference_minmagl(long double x, long double y);
-long double reference_nanl(cl_ulong x);
-long double reference_reciprocall(long double x);
-long double reference_remainderl(long double x, long double y);
-long double reference_rintl(long double x);
-long double reference_roundl(long double x);
-long double reference_truncl(long double x);
-long double reference_floorl(long double x);
-long double reference_fmodl(long double x, long double y);
-long double reference_frexpl(long double x, int* n);
-long double reference_ldexpl(long double x, int n);
+long double reference_copysignl( long double x, long double y);
+long double reference_exp10l( long double );
+long double reference_exp2l( long double x );
+long double reference_expm1l( long double x );
+long double reference_fmaxl( long double x, long double y );
+long double reference_fminl( long double x, long double y );
+long double reference_hypotl( long double x, long double y );
+long double reference_lgammal( long double x);
+int    reference_ilogbl( long double );
+long double reference_log2l( long double x );
+long double reference_log1pl( long double x );
+long double reference_logbl( long double x );
+long double reference_maxmagl( long double x, long double y );
+long double reference_minmagl( long double x, long double y );
+long double reference_nanl( cl_ulong x );
+long double reference_reciprocall( long double x );
+long double reference_remainderl( long double x, long double y );
+long double reference_rintl( long double x );
+long double reference_roundl( long double x );
+long double reference_truncl( long double x );
+long double reference_floorl( long double x );
+long double reference_fmodl( long double x, long double y );
+long double reference_frexpl( long double x, int *n );
+long double reference_ldexpl( long double x, int n );
-long double reference_assignmentl(long double x);
-int reference_notl(long double x);
+long double reference_assignmentl( long double x );
+int reference_notl( long double x );
diff --git a/test_conformance/math_brute_force/sleep.cpp b/test_conformance/math_brute_force/sleep.cpp
deleted file mode 100644
index c7b1243..0000000
--- a/test_conformance/math_brute_force/sleep.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "sleep.h"
-#include "utility.h"
-#if defined(__APPLE__)
-#include <IOKit/pwr_mgt/IOPMLib.h>
-#include <IOKit/IOMessage.h>
-    io_connect_t connection;
-    IONotificationPortRef port;
-    io_object_t iterator;
-} sleepInfo;
-void sleepCallback(void* refcon, io_service_t service, natural_t messageType,
-                   void* messageArgument);
-void sleepCallback(void* refcon UNUSED, io_service_t service UNUSED,
-                   natural_t messageType, void* messageArgument)
-    IOReturn result;
-    /*
-    service -- The IOService whose state has changed.
-    messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the
-    IOService's family. messageArgument -- An argument for the message,
-    dependent on the messageType.
-    */
-    switch (messageType)
-    {
-        case kIOMessageSystemWillSleep:
-            // Handle demand sleep (such as sleep caused by running out of
-            // batteries, closing the lid of a laptop, or selecting
-            // sleep from the Apple menu.
-            IOAllowPowerChange(sleepInfo.connection, (long)messageArgument);
-            vlog("Hard sleep occurred.\n");
-            break;
-        case kIOMessageCanSystemSleep:
-            // In this case, the computer has been idle for several minutes
-            // and will sleep soon so you must either allow or cancel
-            // this notification. Important: if you don’t respond, there will
-            // be a 30-second timeout before the computer sleeps.
-            // IOCancelPowerChange(root_port,(long)messageArgument);
-            result = IOCancelPowerChange(sleepInfo.connection,
-                                         (long)messageArgument);
-            if (kIOReturnSuccess != result)
-                vlog("sleep prevention failed. (%d)\n", result);
-            break;
-        case kIOMessageSystemHasPoweredOn:
-            // Handle wakeup.
-            break;
-    }
-void PreventSleep(void)
-#if defined(__APPLE__)
-    vlog("Disabling sleep... ");
-    sleepInfo.iterator = (io_object_t)0;
-    sleepInfo.port = NULL;
-    sleepInfo.connection = IORegisterForSystemPower(
-        &sleepInfo, // void * refcon,
-        &sleepInfo.port, // IONotificationPortRef * thePortRef,
-        sleepCallback, // IOServiceInterestCallback callback,
-        &sleepInfo.iterator // io_object_t * notifier
-    );
-    if ((io_connect_t)0 == sleepInfo.connection)
-        vlog("failed.\n");
-    else
-        vlog("done.\n");
-    CFRunLoopAddSource(CFRunLoopGetCurrent(),
-                       IONotificationPortGetRunLoopSource(sleepInfo.port),
-                       kCFRunLoopDefaultMode);
-    vlog("*** PreventSleep() is not implemented on this platform.\n");
-void ResumeSleep(void)
-#if defined(__APPLE__)
-    IOReturn result = IODeregisterForSystemPower(&sleepInfo.iterator);
-    if (0 != result)
-        vlog("Got error %d restoring sleep \n", result);
-    else
-        vlog("Sleep restored.\n");
-    vlog("*** ResumeSleep() is not implemented on this platform.\n");
diff --git a/test_conformance/math_brute_force/sleep.h b/test_conformance/math_brute_force/sleep.h
deleted file mode 100644
index ca64395..0000000
--- a/test_conformance/math_brute_force/sleep.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef SLEEP_H
-#define SLEEP_H
-void PreventSleep(void);
-void ResumeSleep(void);
-#endif /* SLEEP_H */
diff --git a/test_conformance/math_brute_force/ternary.cpp b/test_conformance/math_brute_force/ternary.cpp
new file mode 100644
index 0000000..1bd7d88
--- /dev/null
+++ b/test_conformance/math_brute_force/ternary.cpp
@@ -0,0 +1,1359 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include "FunctionList.h"
+#define FLUSHED 1
+int TestFunc_Float_Float_Float_Float(const Func *f, MTdata);
+int TestFunc_Double_Double_Double_Double(const Func *f, MTdata);
+extern const vtbl _ternary = { "ternary", TestFunc_Float_Float_Float_Float,
+                               TestFunc_Double_Double_Double_Double };
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = {
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2,  __global float", sizeNames[vectorSize], "* in3 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+        "}\n"
+    };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2 , __global float* in3)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       float3 f0 = vload3( 0, in + 3 * i );\n"
+        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
+        "       f0 = ", name, "( f0, f1, f2 );\n"
+        "       vstore3( f0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       float3 f0, f1, f2;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       f0 = ", name, "( f0, f1, f2 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = f0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = f0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = {
+        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2,  __global double", sizeNames[vectorSize], "* in3 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+        "}\n"
+    };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2 , __global double* in3)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       double3 d0 = vload3( 0, in + 3 * i );\n"
+        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
+        "       d0 = ", name, "( d0, d1, d2 );\n"
+        "       vstore3( d0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       double3 d0, d1, d2;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       d0 = ", name, "( d0, d1, d2 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = d0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = d0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.75f, -1.5f, -1.25f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), MAKE_HEX_FLOAT(-0x1.003p0f, -0x1003000L, -24), -MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.75f, 1.5f, 1.25f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), MAKE_HEX_FLOAT(0x1.003p0f, 0x1003000L, -24), +MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    float maxErrorVal3 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+    int skipNanInf = (0 == strcmp( "fma", f->nameInCode )) && ! gInfNanSupport;
+    cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
+    float float_ulps;
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+    /*
+     for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+     if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+     return error;
+     */
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        j = 0;
+        if( i == 0 )
+        { // test edge cases
+            float *fp = (float *)gIn;
+            float *fp2 = (float *)gIn2;
+            float *fp3 = (float *)gIn3;
+            uint32_t x, y, z;  x = y = z = 0;
+            for( ; j < bufferSize / sizeof( float ); j++ )
+            {
+                fp[j] = specialValuesFloat[x];
+                fp2[j] = specialValuesFloat[y];
+                fp3[j] = specialValuesFloat[z];
+                if( ++x >= specialValuesFloatCount )
+                {
+                    x = 0;
+                    if( ++y >= specialValuesFloatCount )
+                    {
+                        y = 0;
+                        if( ++z >= specialValuesFloatCount )
+                            break;
+                    }
+                }
+            }
+            if( j == bufferSize / sizeof( float ) )
+                vlog_error( "Test Error: not all special cases tested!\n" );
+        }
+        for( ; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        float *s = (float *)gIn;
+        float *s2 = (float *)gIn2;
+        float *s3 = (float *)gIn3;
+        if( skipNanInf )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            {
+                feclearexcept(FE_OVERFLOW);
+                r[j] = (float) f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED );
+                overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                r[j] = (float) f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED );
+        }
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float err;
+                    int fail;
+                    float test = ((float*) q)[j];
+                    float correct = f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED );
+                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                    if( skipNanInf )
+                    {
+                        if( overflow[j]                                         ||
+                           IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                           IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        ||
+                           IsFloatInfinity(s2[j])   || IsFloatNaN(s2[j])       ||
+                           IsFloatInfinity(s3[j])   || IsFloatNaN(s3[j])       )
+                            continue;
+                    }
+                    err = Ulp_Error( test, correct );
+                    fail = ! (fabsf(err) <= float_ulps);
+                    if( fail && ftz )
+                    {
+                        float correct2, err2;
+                        // retry per section  with flushing on
+                        if( 0.0f == test && 0.0f == f->func.f_fma( s[j], s2[j], s3[j], FLUSHED ) )
+                        {
+                            fail = 0;
+                            err = 0.0f;
+                        }
+                        // retry per section
+                        if( fail && IsFloatSubnormal( s[j] ) )
+                        { // look at me,
+                            float err3, correct3;
+                            if( skipNanInf )
+                                feclearexcept( FE_OVERFLOW );
+                            correct2 = f->func.f_fma( 0.0f, s2[j], s3[j], CORRECTLY_ROUNDED );
+                            correct3 = f->func.f_fma( -0.0f, s2[j], s3[j], CORRECTLY_ROUNDED );
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept( FE_OVERFLOW ) )
+                                    continue;
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                   IsFloatInfinity(correct3) || IsFloatNaN(correct3)   )
+                                    continue;
+                            }
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( 0.0f == test &&
+                                ( 0.0f == f->func.f_fma(  0.0f, s2[j], s3[j], FLUSHED )  ||
+                                  0.0f == f->func.f_fma( -0.0f, s2[j], s3[j], FLUSHED ) )
+                              )
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+                            //try with first two args as zero
+                            if( IsFloatSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                double correct4, correct5;
+                                float err4, err5;
+                                if( skipNanInf )
+                                    feclearexcept( FE_OVERFLOW );
+                                correct2 = f->func.f_fma( 0.0f, 0.0f, s3[j], CORRECTLY_ROUNDED );
+                                correct3 = f->func.f_fma( -0.0f, 0.0f, s3[j], CORRECTLY_ROUNDED );
+                                correct4 = f->func.f_fma( 0.0f, -0.0f, s3[j], CORRECTLY_ROUNDED );
+                                correct5 = f->func.f_fma( -0.0f, -0.0f, s3[j], CORRECTLY_ROUNDED );
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                if( !gInfNanSupport )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) )
+                                        continue;
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                       IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
+                                       IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
+                                       IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
+                                        continue;
+                                }
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                err4 = Ulp_Error( test, correct4  );
+                                err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) &&
+                                                 (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( 0.0f == test &&
+                                    ( 0.0f == f->func.f_fma(  0.0f,  0.0f, s3[j], FLUSHED )  ||
+                                      0.0f == f->func.f_fma( -0.0f,  0.0f, s3[j], FLUSHED )  ||
+                                      0.0f == f->func.f_fma(  0.0f, -0.0f, s3[j], FLUSHED )  ||
+                                      0.0f == f->func.f_fma( -0.0f, -0.0f, s3[j], FLUSHED )  )
+                                )
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+                                if( IsFloatSubnormal( s3[j] )  )
+                                {
+                                    if( test == 0.0f )  // 0*0+0 is 0
+                                    {
+                                        fail = 0;
+                                        err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                double correct4, correct5;
+                                float err4, err5;
+                                if( skipNanInf )
+                                    feclearexcept( FE_OVERFLOW );
+                                correct2 = f->func.f_fma( 0.0f, s2[j], 0.0f, CORRECTLY_ROUNDED );
+                                correct3 = f->func.f_fma( -0.0f, s2[j], 0.0f, CORRECTLY_ROUNDED );
+                                correct4 = f->func.f_fma( 0.0f,  s2[j], -0.0f, CORRECTLY_ROUNDED );
+                                correct5 = f->func.f_fma( -0.0f, s2[j], -0.0f, CORRECTLY_ROUNDED );
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                if( !gInfNanSupport )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) )
+                                        continue;
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                       IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
+                                       IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
+                                       IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
+                                        continue;
+                                }
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                err4 = Ulp_Error( test, correct4  );
+                                err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) &&
+                                                 (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( 0.0f == test &&
+                                    (   0.0f == f->func.f_fma( 0.0f, s2[j], 0.0f, FLUSHED )   ||
+                                        0.0f == f->func.f_fma(-0.0f, s2[j], 0.0f, FLUSHED )   ||
+                                        0.0f == f->func.f_fma( 0.0f, s2[j],-0.0f, FLUSHED )   ||
+                                        0.0f == f->func.f_fma(-0.0f, s2[j],-0.0f, FLUSHED )   )
+                                )
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal( s2[j] ) )
+                        {
+                            double correct2, correct3;
+                            float err2, err3;
+                            if( skipNanInf )
+                                feclearexcept( FE_OVERFLOW );
+                            correct2 = f->func.f_fma( s[j], 0.0f, s3[j], CORRECTLY_ROUNDED );
+                            correct3 = f->func.f_fma( s[j], -0.0f, s3[j], CORRECTLY_ROUNDED );
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept( FE_OVERFLOW ) )
+                                    continue;
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                   IsFloatInfinity(correct3) || IsFloatNaN(correct3)   )
+                                    continue;
+                            }
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( 0.0f == test &&
+                                (   0.0f == f->func.f_fma( s[j], 0.0f, s3[j], FLUSHED )  ||
+                                    0.0f == f->func.f_fma( s[j], -0.0f, s3[j], FLUSHED ) )
+                            )
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+                            //try with second two args as zero
+                            if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                double correct4, correct5;
+                                float err4, err5;
+                                if( skipNanInf )
+                                    feclearexcept( FE_OVERFLOW );
+                                correct2 = f->func.f_fma( s[j], 0.0f, 0.0f, CORRECTLY_ROUNDED );
+                                correct3 = f->func.f_fma( s[j], -0.0f, 0.0f, CORRECTLY_ROUNDED );
+                                correct4 = f->func.f_fma( s[j], 0.0f, -0.0f, CORRECTLY_ROUNDED );
+                                correct5 = f->func.f_fma( s[j], -0.0f, -0.0f, CORRECTLY_ROUNDED );
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                if( !gInfNanSupport )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) )
+                                        continue;
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                       IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
+                                       IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
+                                       IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
+                                        continue;
+                                }
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                err4 = Ulp_Error( test, correct4  );
+                                err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) &&
+                                                 (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( 0.0f == test &&
+                                    (   0.0f == f->func.f_fma( s[j], 0.0f, 0.0f, FLUSHED )    ||
+                                        0.0f == f->func.f_fma( s[j],-0.0f, 0.0f, FLUSHED )    ||
+                                        0.0f == f->func.f_fma( s[j], 0.0f,-0.0f, FLUSHED )    ||
+                                        0.0f == f->func.f_fma( s[j],-0.0f,-0.0f, FLUSHED )    )
+                                )
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal(s3[j]) )
+                        {
+                            double correct2, correct3;
+                            float err2, err3;
+                            if( skipNanInf )
+                                feclearexcept( FE_OVERFLOW );
+                            correct2 = f->func.f_fma( s[j], s2[j], 0.0f, CORRECTLY_ROUNDED );
+                            correct3 = f->func.f_fma( s[j], s2[j], -0.0f, CORRECTLY_ROUNDED );
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept( FE_OVERFLOW ) )
+                                    continue;
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                   IsFloatInfinity(correct3) || IsFloatNaN(correct3)   )
+                                    continue;
+                            }
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( 0.0f == test &&
+                                (   0.0f == f->func.f_fma( s[j], s2[j], 0.0f, FLUSHED ) ||
+                                    0.0f == f->func.f_fma( s[j], s2[j],-0.0f, FLUSHED )  )
+                            )
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a, %a} ({0x%8.8x, 0x%8.8x, 0x%8.8x}): *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((cl_uint*)s)[j], ((cl_uint*)s2)[j], ((cl_uint*)s3)[j],  ((float*) gOut_Ref)[j], test );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step,  bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+static const size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    double maxErrorVal3 = 0.0f;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( double );
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    Force64BitFPUPrecision();
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info ) ))
+    {
+        return error;
+    }
+    /*
+     for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+     if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+     return error;
+     */
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        j = 0;
+        if( i == 0 )
+        { // test edge cases
+            uint32_t x, y, z;  x = y = z = 0;
+            for( ; j < bufferSize / sizeof( double ); j++ )
+            {
+                p[j] = specialValuesDouble[x];
+                p2[j] = specialValuesDouble[y];
+                p3[j] = specialValuesDouble[z];
+                if( ++x >= specialValuesDoubleCount )
+                {
+                    x = 0;
+                    if( ++y >= specialValuesDoubleCount )
+                    {
+                        y = 0;
+                        if( ++z >= specialValuesDoubleCount )
+                            break;
+                    }
+                }
+            }
+            if( j == bufferSize / sizeof( double ) )
+                vlog_error( "Test Error: not all special cases tested!\n" );
+        }
+        for( ; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        double *s = (double *)gIn;
+        double *s2 = (double *)gIn2;
+        double *s3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            r[j] = (double) f->dfunc.f_fff( s[j], s2[j], s3[j] );
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    double test = ((double*) q)[j];
+                    long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    int fail = ! (fabsf(err) <= f->double_ulps);
+                    if( fail && ftz )
+                    {
+                        // retry per section
+                        if( IsDoubleSubnormal(correct) )
+                        { // look at me,
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        // retry per section
+                        if( fail && IsDoubleSubnormal( s[j] ) )
+                        { // look at me,
+                            long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
+                            long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            { // look at me now,
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                            //try with first two args as zero
+                            if( IsDoubleSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                correct2 = f->dfunc.f_fff( 0.0, 0.0, s3[j] );
+                                correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
+                                long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
+                                long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                   IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                                if( IsDoubleSubnormal( s3[j] )  )
+                                { // but you have to know how!
+                                    correct2 = f->dfunc.f_fff( 0.0, 0.0, 0.0f );
+                                    correct3 = f->dfunc.f_fff( -0.0, 0.0, 0.0f );
+                                    correct4 = f->dfunc.f_fff( 0.0, -0.0, 0.0f );
+                                    correct5 = f->dfunc.f_fff( -0.0, -0.0, 0.0f );
+                                    long double correct6 = f->dfunc.f_fff( 0.0, 0.0, -0.0f );
+                                    long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
+                                    long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
+                                    long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
+                                    err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                    err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                    err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                    err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                    float err6 = Bruteforce_Ulp_Error_Double( test, correct6  );
+                                    float err7 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                    float err8 = Bruteforce_Ulp_Error_Double( test, correct8  );
+                                    float err9 = Bruteforce_Ulp_Error_Double( test, correct9  );
+                                    fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                     (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
+                                                     (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
+                                                     (!(fabsf(err7) <= f->double_ulps)) && (!(fabsf(err8) <= f->double_ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+                                    if( fabsf( err6 ) < fabsf(err ) )
+                                        err = err6;
+                                    if( fabsf( err7 ) < fabsf(err ) )
+                                        err = err7;
+                                    if( fabsf( err8 ) < fabsf(err ) )
+                                        err = err8;
+                                    if( fabsf( err9 ) < fabsf(err ) )
+                                        err = err9;
+                                    // retry per section
+                                    if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                       IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps )  ||
+                                       IsDoubleResultSubnormal( correct6, f->double_ulps ) || IsDoubleResultSubnormal( correct7, f->double_ulps )  ||
+                                       IsDoubleResultSubnormal( correct8, f->double_ulps ) || IsDoubleResultSubnormal( correct9, f->double_ulps )  )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( 0.0, s2[j], 0.0 );
+                                correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
+                                long double correct4 = f->dfunc.f_fff( 0.0,  s2[j], -0.0 );
+                                long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                   IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal( s2[j] ) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
+                            long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps )  || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                            //try with second two args as zero
+                            if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( s[j], 0.0, 0.0 );
+                                correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
+                                long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
+                                long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                // retry per section
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                   IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal(s3[j]) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
+                            long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: %f ulp error at {%.13la, %.13la, %.13la}: *%.13la vs. %.13la\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((double*) gOut_Ref)[j], test );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
deleted file mode 100644
index 606fdc5..0000000
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ /dev/null
@@ -1,740 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-#define FLUSHED 1
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2,  __global double",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2, "
-        "__global double* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       double3 d2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-// A table of more difficult cases to get right
-static const double specialValues[] = {
-    -NAN,
-    -DBL_MAX,
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
-    -3.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
-    -2.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
-    -2.0,
-    MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
-    -1.5,
-    MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    -1.0,
-    MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
-    -DBL_MIN,
-    MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
-    -0.0,
-    +NAN,
-    +DBL_MAX,
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
-    MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
-    MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
-    +3.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
-    +2.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
-    +2.0,
-    MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
-    +1.5,
-    MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
-    MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
-    +1.0,
-    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
-    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
-    +DBL_MIN,
-    MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
-    MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
-    +0.0,
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
-                                         bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int ftz = f->ftz || gForceFTZ;
-    double maxErrorVal = 0.0f;
-    double maxErrorVal2 = 0.0f;
-    double maxErrorVal3 = 0.0f;
-    uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    Force64BitFPUPrecision();
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        double *p2 = (double *)gIn2;
-        double *p3 = (double *)gIn3;
-        size_t idx = 0;
-        if (i == 0)
-        { // test edge cases
-            uint32_t x, y, z;
-            x = y = z = 0;
-            for (; idx < BUFFER_SIZE / sizeof(double); idx++)
-            {
-                p[idx] = specialValues[x];
-                p2[idx] = specialValues[y];
-                p3[idx] = specialValues[z];
-                if (++x >= specialValuesCount)
-                {
-                    x = 0;
-                    if (++y >= specialValuesCount)
-                    {
-                        y = 0;
-                        if (++z >= specialValuesCount) break;
-                    }
-                }
-            }
-            if (idx == BUFFER_SIZE / sizeof(double))
-                vlog_error("Test Error: not all special cases tested!\n");
-        }
-        for (; idx < BUFFER_SIZE / sizeof(double); idx++)
-        {
-            p[idx] = DoubleFromUInt32(genrand_int32(d));
-            p2[idx] = DoubleFromUInt32(genrand_int32(d));
-            p3[idx] = DoubleFromUInt32(genrand_int32(d));
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn3, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_double) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        double *r = (double *)gOut_Ref;
-        double *s = (double *)gIn;
-        double *s2 = (double *)gIn2;
-        double *s3 = (double *)gIn3;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-            r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint64_t *t = (uint64_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint64_t *q = (uint64_t *)(gOut[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    double test = ((double *)q)[j];
-                    long double correct = f->dfunc.f_fff(s[j], s2[j], s3[j]);
-                    float err = Bruteforce_Ulp_Error_Double(test, correct);
-                    int fail = !(fabsf(err) <= f->double_ulps);
-                    if (fail && ftz)
-                    {
-                        // retry per section
-                        if (IsDoubleSubnormal(correct))
-                        { // look at me,
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                        // retry per section
-                        if (fail && IsDoubleSubnormal(s[j]))
-                        { // look at me,
-                            long double correct2 =
-                                f->dfunc.f_fff(0.0, s2[j], s3[j]);
-                            long double correct3 =
-                                f->dfunc.f_fff(-0.0, s2[j], s3[j]);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= f->double_ulps))
-                                    && (!(fabsf(err3) <= f->double_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            // retry per section
-                            if (IsDoubleResultSubnormal(correct2,
-                                                        f->double_ulps)
-                                || IsDoubleResultSubnormal(correct3,
-                                                           f->double_ulps))
-                            { // look at me now,
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                            // try with first two args as zero
-                            if (IsDoubleSubnormal(s2[j]))
-                            { // its fun to have fun,
-                                correct2 = f->dfunc.f_fff(0.0, 0.0, s3[j]);
-                                correct3 = f->dfunc.f_fff(-0.0, 0.0, s3[j]);
-                                long double correct4 =
-                                    f->dfunc.f_fff(0.0, -0.0, s3[j]);
-                                long double correct5 =
-                                    f->dfunc.f_fff(-0.0, -0.0, s3[j]);
-                                err2 =
-                                    Bruteforce_Ulp_Error_Double(test, correct2);
-                                err3 =
-                                    Bruteforce_Ulp_Error_Double(test, correct3);
-                                float err4 =
-                                    Bruteforce_Ulp_Error_Double(test, correct4);
-                                float err5 =
-                                    Bruteforce_Ulp_Error_Double(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= f->double_ulps))
-                                        && (!(fabsf(err3) <= f->double_ulps))
-                                        && (!(fabsf(err4) <= f->double_ulps))
-                                        && (!(fabsf(err5) <= f->double_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-                                // retry per section
-                                if (IsDoubleResultSubnormal(correct2,
-                                                            f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct3,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct4,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct5,
-                                                               f->double_ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-                                if (IsDoubleSubnormal(s3[j]))
-                                { // but you have to know how!
-                                    correct2 = f->dfunc.f_fff(0.0, 0.0, 0.0f);
-                                    correct3 = f->dfunc.f_fff(-0.0, 0.0, 0.0f);
-                                    correct4 = f->dfunc.f_fff(0.0, -0.0, 0.0f);
-                                    correct5 = f->dfunc.f_fff(-0.0, -0.0, 0.0f);
-                                    long double correct6 =
-                                        f->dfunc.f_fff(0.0, 0.0, -0.0f);
-                                    long double correct7 =
-                                        f->dfunc.f_fff(-0.0, 0.0, -0.0f);
-                                    long double correct8 =
-                                        f->dfunc.f_fff(0.0, -0.0, -0.0f);
-                                    long double correct9 =
-                                        f->dfunc.f_fff(-0.0, -0.0, -0.0f);
-                                    err2 = Bruteforce_Ulp_Error_Double(
-                                        test, correct2);
-                                    err3 = Bruteforce_Ulp_Error_Double(
-                                        test, correct3);
-                                    err4 = Bruteforce_Ulp_Error_Double(
-                                        test, correct4);
-                                    err5 = Bruteforce_Ulp_Error_Double(
-                                        test, correct5);
-                                    float err6 = Bruteforce_Ulp_Error_Double(
-                                        test, correct6);
-                                    float err7 = Bruteforce_Ulp_Error_Double(
-                                        test, correct7);
-                                    float err8 = Bruteforce_Ulp_Error_Double(
-                                        test, correct8);
-                                    float err9 = Bruteforce_Ulp_Error_Double(
-                                        test, correct9);
-                                    fail = fail
-                                        && ((!(fabsf(err2) <= f->double_ulps))
-                                            && (!(fabsf(err3)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err4)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err5)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err5)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err6)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err7)
-                                                  <= f->double_ulps))
-                                            && (!(fabsf(err8)
-                                                  <= f->double_ulps)));
-                                    if (fabsf(err2) < fabsf(err)) err = err2;
-                                    if (fabsf(err3) < fabsf(err)) err = err3;
-                                    if (fabsf(err4) < fabsf(err)) err = err4;
-                                    if (fabsf(err5) < fabsf(err)) err = err5;
-                                    if (fabsf(err6) < fabsf(err)) err = err6;
-                                    if (fabsf(err7) < fabsf(err)) err = err7;
-                                    if (fabsf(err8) < fabsf(err)) err = err8;
-                                    if (fabsf(err9) < fabsf(err)) err = err9;
-                                    // retry per section
-                                    if (IsDoubleResultSubnormal(correct2,
-                                                                f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct3, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct4, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct5, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct6, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct7, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct8, f->double_ulps)
-                                        || IsDoubleResultSubnormal(
-                                            correct9, f->double_ulps))
-                                    {
-                                        fail = fail && (test != 0.0f);
-                                        if (!fail) err = 0.0f;
-                                    }
-                                }
-                            }
-                            else if (IsDoubleSubnormal(s3[j]))
-                            {
-                                correct2 = f->dfunc.f_fff(0.0, s2[j], 0.0);
-                                correct3 = f->dfunc.f_fff(-0.0, s2[j], 0.0);
-                                long double correct4 =
-                                    f->dfunc.f_fff(0.0, s2[j], -0.0);
-                                long double correct5 =
-                                    f->dfunc.f_fff(-0.0, s2[j], -0.0);
-                                err2 =
-                                    Bruteforce_Ulp_Error_Double(test, correct2);
-                                err3 =
-                                    Bruteforce_Ulp_Error_Double(test, correct3);
-                                float err4 =
-                                    Bruteforce_Ulp_Error_Double(test, correct4);
-                                float err5 =
-                                    Bruteforce_Ulp_Error_Double(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= f->double_ulps))
-                                        && (!(fabsf(err3) <= f->double_ulps))
-                                        && (!(fabsf(err4) <= f->double_ulps))
-                                        && (!(fabsf(err5) <= f->double_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-                                // retry per section
-                                if (IsDoubleResultSubnormal(correct2,
-                                                            f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct3,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct4,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct5,
-                                                               f->double_ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                        }
-                        else if (fail && IsDoubleSubnormal(s2[j]))
-                        {
-                            long double correct2 =
-                                f->dfunc.f_fff(s[j], 0.0, s3[j]);
-                            long double correct3 =
-                                f->dfunc.f_fff(s[j], -0.0, s3[j]);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= f->double_ulps))
-                                    && (!(fabsf(err3) <= f->double_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            // retry per section
-                            if (IsDoubleResultSubnormal(correct2,
-                                                        f->double_ulps)
-                                || IsDoubleResultSubnormal(correct3,
-                                                           f->double_ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                            // try with second two args as zero
-                            if (IsDoubleSubnormal(s3[j]))
-                            {
-                                correct2 = f->dfunc.f_fff(s[j], 0.0, 0.0);
-                                correct3 = f->dfunc.f_fff(s[j], -0.0, 0.0);
-                                long double correct4 =
-                                    f->dfunc.f_fff(s[j], 0.0, -0.0);
-                                long double correct5 =
-                                    f->dfunc.f_fff(s[j], -0.0, -0.0);
-                                err2 =
-                                    Bruteforce_Ulp_Error_Double(test, correct2);
-                                err3 =
-                                    Bruteforce_Ulp_Error_Double(test, correct3);
-                                float err4 =
-                                    Bruteforce_Ulp_Error_Double(test, correct4);
-                                float err5 =
-                                    Bruteforce_Ulp_Error_Double(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= f->double_ulps))
-                                        && (!(fabsf(err3) <= f->double_ulps))
-                                        && (!(fabsf(err4) <= f->double_ulps))
-                                        && (!(fabsf(err5) <= f->double_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-                                // retry per section
-                                if (IsDoubleResultSubnormal(correct2,
-                                                            f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct3,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct4,
-                                                               f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct5,
-                                                               f->double_ulps))
-                                {
-                                    fail = fail && (test != 0.0f);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                        }
-                        else if (fail && IsDoubleSubnormal(s3[j]))
-                        {
-                            long double correct2 =
-                                f->dfunc.f_fff(s[j], s2[j], 0.0);
-                            long double correct3 =
-                                f->dfunc.f_fff(s[j], s2[j], -0.0);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= f->double_ulps))
-                                    && (!(fabsf(err3) <= f->double_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            // retry per section
-                            if (IsDoubleResultSubnormal(correct2,
-                                                        f->double_ulps)
-                                || IsDoubleResultSubnormal(correct3,
-                                                           f->double_ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                        maxErrorVal2 = s2[j];
-                        maxErrorVal3 = s3[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error("\nERROR: %sD%s: %f ulp error at {%.13la, "
-                                   "%.13la, %.13la}: *%.13la vs. %.13la\n",
-                                   f->name, sizeNames[k], err, s[j], s2[j],
-                                   s3[j], ((double *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
-             maxErrorVal3);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
deleted file mode 100644
index e52c0a0..0000000
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ /dev/null
@@ -1,875 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-#define FLUSHED 1
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2,  __global float",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2, "
-        "__global float* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       float3 f2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-// A table of more difficult cases to get right
-static const float specialValues[] = {
-    -NAN,
-    -FLT_MAX,
-    MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
-    MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
-    MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
-    MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
-    MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
-    -3.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
-    -2.5f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
-    -2.0f,
-    MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
-    -1.75f,
-    -1.5f,
-    -1.25f,
-    MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
-    MAKE_HEX_FLOAT(-0x1.003p0f, -0x1003000L, -24),
-    -MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24),
-    -1.0f,
-    MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
-    -FLT_MIN,
-    MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
-    MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
-    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
-    MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
-    MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
-    MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
-    -0.0f,
-    +NAN,
-    +FLT_MAX,
-    MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
-    MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
-    MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
-    MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
-    MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
-    MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
-    +3.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
-    2.5f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
-    +2.0f,
-    MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
-    1.75f,
-    1.5f,
-    1.25f,
-    MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
-    MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
-    MAKE_HEX_FLOAT(0x1.003p0f, 0x1003000L, -24),
-    +MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24),
-    +1.0f,
-    MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
-    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
-    +FLT_MIN,
-    MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
-    MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
-    MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
-    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
-    MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
-    MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
-    MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
-    +0.0f,
-static const size_t specialValuesCount =
-    sizeof(specialValues) / sizeof(specialValues[0]);
-int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    float maxErrorVal = 0.0f;
-    float maxErrorVal2 = 0.0f;
-    float maxErrorVal3 = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
-    float float_ulps;
-    if (gIsEmbedded)
-        float_ulps = f->float_embedded_ulps;
-    else
-        float_ulps = f->float_ulps;
-    int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        cl_uint *p = (cl_uint *)gIn;
-        cl_uint *p2 = (cl_uint *)gIn2;
-        cl_uint *p3 = (cl_uint *)gIn3;
-        size_t idx = 0;
-        if (i == 0)
-        { // test edge cases
-            float *fp = (float *)gIn;
-            float *fp2 = (float *)gIn2;
-            float *fp3 = (float *)gIn3;
-            uint32_t x, y, z;
-            x = y = z = 0;
-            for (; idx < BUFFER_SIZE / sizeof(float); idx++)
-            {
-                fp[idx] = specialValues[x];
-                fp2[idx] = specialValues[y];
-                fp3[idx] = specialValues[z];
-                if (++x >= specialValuesCount)
-                {
-                    x = 0;
-                    if (++y >= specialValuesCount)
-                    {
-                        y = 0;
-                        if (++z >= specialValuesCount) break;
-                    }
-                }
-            }
-            if (idx == BUFFER_SIZE / sizeof(float))
-                vlog_error("Test Error: not all special cases tested!\n");
-        }
-        for (; idx < BUFFER_SIZE / sizeof(float); idx++)
-        {
-            p[idx] = genrand_int32(d);
-            p2[idx] = genrand_int32(d);
-            p3[idx] = genrand_int32(d);
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
-            return error;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn3, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeof(cl_float) * sizeValues[j];
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
-                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        float *r = (float *)gOut_Ref;
-        float *s = (float *)gIn;
-        float *s2 = (float *)gIn2;
-        float *s3 = (float *)gIn3;
-        if (skipNanInf)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            {
-                feclearexcept(FE_OVERFLOW);
-                r[j] =
-                    (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
-                overflow[j] =
-                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-            }
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                r[j] =
-                    (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
-        }
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    float err;
-                    int fail;
-                    float test = ((float *)q)[j];
-                    float correct =
-                        f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
-                    // Per section 10 paragraph 6, accept any result if an input
-                    // or output is a infinity or NaN or overflow
-                    if (skipNanInf)
-                    {
-                        if (overflow[j] || IsFloatInfinity(correct)
-                            || IsFloatNaN(correct) || IsFloatInfinity(s[j])
-                            || IsFloatNaN(s[j]) || IsFloatInfinity(s2[j])
-                            || IsFloatNaN(s2[j]) || IsFloatInfinity(s3[j])
-                            || IsFloatNaN(s3[j]))
-                            continue;
-                    }
-                    err = Ulp_Error(test, correct);
-                    fail = !(fabsf(err) <= float_ulps);
-                    if (fail && ftz)
-                    {
-                        float correct2, err2;
-                        // retry per section  with flushing on
-                        if (0.0f == test
-                            && 0.0f
-                                == f->func.f_fma(s[j], s2[j], s3[j], FLUSHED))
-                        {
-                            fail = 0;
-                            err = 0.0f;
-                        }
-                        // retry per section
-                        if (fail && IsFloatSubnormal(s[j]))
-                        { // look at me,
-                            float err3, correct3;
-                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                            correct2 = f->func.f_fma(0.0f, s2[j], s3[j],
-                                                     CORRECTLY_ROUNDED);
-                            correct3 = f->func.f_fma(-0.0f, s2[j], s3[j],
-                                                     CORRECTLY_ROUNDED);
-                            if (skipNanInf)
-                            {
-                                if (fetestexcept(FE_OVERFLOW)) continue;
-                                // Note: no double rounding here.  Reference
-                                // functions calculate in single precision.
-                                if (IsFloatInfinity(correct2)
-                                    || IsFloatNaN(correct2)
-                                    || IsFloatInfinity(correct3)
-                                    || IsFloatNaN(correct3))
-                                    continue;
-                            }
-                            err2 = Ulp_Error(test, correct2);
-                            err3 = Ulp_Error(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= float_ulps))
-                                    && (!(fabsf(err3) <= float_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            // retry per section
-                            if (0.0f == test
-                                && (0.0f
-                                        == f->func.f_fma(0.0f, s2[j], s3[j],
-                                                         FLUSHED)
-                                    || 0.0f
-                                        == f->func.f_fma(-0.0f, s2[j], s3[j],
-                                                         FLUSHED)))
-                            {
-                                fail = 0;
-                                err = 0.0f;
-                            }
-                            // try with first two args as zero
-                            if (IsFloatSubnormal(s2[j]))
-                            { // its fun to have fun,
-                                double correct4, correct5;
-                                float err4, err5;
-                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                                correct2 = f->func.f_fma(0.0f, 0.0f, s3[j],
-                                                         CORRECTLY_ROUNDED);
-                                correct3 = f->func.f_fma(-0.0f, 0.0f, s3[j],
-                                                         CORRECTLY_ROUNDED);
-                                correct4 = f->func.f_fma(0.0f, -0.0f, s3[j],
-                                                         CORRECTLY_ROUNDED);
-                                correct5 = f->func.f_fma(-0.0f, -0.0f, s3[j],
-                                                         CORRECTLY_ROUNDED);
-                                // Per section 10 paragraph 6, accept any result
-                                // if an input or output is a infinity or NaN or
-                                // overflow
-                                if (!gInfNanSupport)
-                                {
-                                    if (fetestexcept(FE_OVERFLOW)) continue;
-                                    // Note: no double rounding here.  Reference
-                                    // functions calculate in single precision.
-                                    if (IsFloatInfinity(correct2)
-                                        || IsFloatNaN(correct2)
-                                        || IsFloatInfinity(correct3)
-                                        || IsFloatNaN(correct3)
-                                        || IsFloatInfinity(correct4)
-                                        || IsFloatNaN(correct4)
-                                        || IsFloatInfinity(correct5)
-                                        || IsFloatNaN(correct5))
-                                        continue;
-                                }
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                                err4 = Ulp_Error(test, correct4);
-                                err5 = Ulp_Error(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= float_ulps))
-                                        && (!(fabsf(err3) <= float_ulps))
-                                        && (!(fabsf(err4) <= float_ulps))
-                                        && (!(fabsf(err5) <= float_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-                                // retry per section
-                                if (0.0f == test
-                                    && (0.0f
-                                            == f->func.f_fma(0.0f, 0.0f, s3[j],
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(-0.0f, 0.0f, s3[j],
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(0.0f, -0.0f, s3[j],
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(-0.0f, -0.0f,
-                                                             s3[j], FLUSHED)))
-                                {
-                                    fail = 0;
-                                    err = 0.0f;
-                                }
-                                if (IsFloatSubnormal(s3[j]))
-                                {
-                                    if (test == 0.0f) // 0*0+0 is 0
-                                    {
-                                        fail = 0;
-                                        err = 0.0f;
-                                    }
-                                }
-                            }
-                            else if (IsFloatSubnormal(s3[j]))
-                            {
-                                double correct4, correct5;
-                                float err4, err5;
-                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                                correct2 = f->func.f_fma(0.0f, s2[j], 0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct3 = f->func.f_fma(-0.0f, s2[j], 0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct4 = f->func.f_fma(0.0f, s2[j], -0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct5 = f->func.f_fma(-0.0f, s2[j], -0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                // Per section 10 paragraph 6, accept any result
-                                // if an input or output is a infinity or NaN or
-                                // overflow
-                                if (!gInfNanSupport)
-                                {
-                                    if (fetestexcept(FE_OVERFLOW)) continue;
-                                    // Note: no double rounding here.  Reference
-                                    // functions calculate in single precision.
-                                    if (IsFloatInfinity(correct2)
-                                        || IsFloatNaN(correct2)
-                                        || IsFloatInfinity(correct3)
-                                        || IsFloatNaN(correct3)
-                                        || IsFloatInfinity(correct4)
-                                        || IsFloatNaN(correct4)
-                                        || IsFloatInfinity(correct5)
-                                        || IsFloatNaN(correct5))
-                                        continue;
-                                }
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                                err4 = Ulp_Error(test, correct4);
-                                err5 = Ulp_Error(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= float_ulps))
-                                        && (!(fabsf(err3) <= float_ulps))
-                                        && (!(fabsf(err4) <= float_ulps))
-                                        && (!(fabsf(err5) <= float_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-                                // retry per section
-                                if (0.0f == test
-                                    && (0.0f
-                                            == f->func.f_fma(0.0f, s2[j], 0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(-0.0f, s2[j], 0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(0.0f, s2[j], -0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(-0.0f, s2[j],
-                                                             -0.0f, FLUSHED)))
-                                {
-                                    fail = 0;
-                                    err = 0.0f;
-                                }
-                            }
-                        }
-                        else if (fail && IsFloatSubnormal(s2[j]))
-                        {
-                            double correct2, correct3;
-                            float err2, err3;
-                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                            correct2 = f->func.f_fma(s[j], 0.0f, s3[j],
-                                                     CORRECTLY_ROUNDED);
-                            correct3 = f->func.f_fma(s[j], -0.0f, s3[j],
-                                                     CORRECTLY_ROUNDED);
-                            if (skipNanInf)
-                            {
-                                if (fetestexcept(FE_OVERFLOW)) continue;
-                                // Note: no double rounding here.  Reference
-                                // functions calculate in single precision.
-                                if (IsFloatInfinity(correct2)
-                                    || IsFloatNaN(correct2)
-                                    || IsFloatInfinity(correct3)
-                                    || IsFloatNaN(correct3))
-                                    continue;
-                            }
-                            err2 = Ulp_Error(test, correct2);
-                            err3 = Ulp_Error(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= float_ulps))
-                                    && (!(fabsf(err3) <= float_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            // retry per section
-                            if (0.0f == test
-                                && (0.0f
-                                        == f->func.f_fma(s[j], 0.0f, s3[j],
-                                                         FLUSHED)
-                                    || 0.0f
-                                        == f->func.f_fma(s[j], -0.0f, s3[j],
-                                                         FLUSHED)))
-                            {
-                                fail = 0;
-                                err = 0.0f;
-                            }
-                            // try with second two args as zero
-                            if (IsFloatSubnormal(s3[j]))
-                            {
-                                double correct4, correct5;
-                                float err4, err5;
-                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                                correct2 = f->func.f_fma(s[j], 0.0f, 0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct3 = f->func.f_fma(s[j], -0.0f, 0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct4 = f->func.f_fma(s[j], 0.0f, -0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                correct5 = f->func.f_fma(s[j], -0.0f, -0.0f,
-                                                         CORRECTLY_ROUNDED);
-                                // Per section 10 paragraph 6, accept any result
-                                // if an input or output is a infinity or NaN or
-                                // overflow
-                                if (!gInfNanSupport)
-                                {
-                                    if (fetestexcept(FE_OVERFLOW)) continue;
-                                    // Note: no double rounding here.  Reference
-                                    // functions calculate in single precision.
-                                    if (IsFloatInfinity(correct2)
-                                        || IsFloatNaN(correct2)
-                                        || IsFloatInfinity(correct3)
-                                        || IsFloatNaN(correct3)
-                                        || IsFloatInfinity(correct4)
-                                        || IsFloatNaN(correct4)
-                                        || IsFloatInfinity(correct5)
-                                        || IsFloatNaN(correct5))
-                                        continue;
-                                }
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                                err4 = Ulp_Error(test, correct4);
-                                err5 = Ulp_Error(test, correct5);
-                                fail = fail
-                                    && ((!(fabsf(err2) <= float_ulps))
-                                        && (!(fabsf(err3) <= float_ulps))
-                                        && (!(fabsf(err4) <= float_ulps))
-                                        && (!(fabsf(err5) <= float_ulps)));
-                                if (fabsf(err2) < fabsf(err)) err = err2;
-                                if (fabsf(err3) < fabsf(err)) err = err3;
-                                if (fabsf(err4) < fabsf(err)) err = err4;
-                                if (fabsf(err5) < fabsf(err)) err = err5;
-                                // retry per section
-                                if (0.0f == test
-                                    && (0.0f
-                                            == f->func.f_fma(s[j], 0.0f, 0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(s[j], -0.0f, 0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(s[j], 0.0f, -0.0f,
-                                                             FLUSHED)
-                                        || 0.0f
-                                            == f->func.f_fma(s[j], -0.0f, -0.0f,
-                                                             FLUSHED)))
-                                {
-                                    fail = 0;
-                                    err = 0.0f;
-                                }
-                            }
-                        }
-                        else if (fail && IsFloatSubnormal(s3[j]))
-                        {
-                            double correct2, correct3;
-                            float err2, err3;
-                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                            correct2 = f->func.f_fma(s[j], s2[j], 0.0f,
-                                                     CORRECTLY_ROUNDED);
-                            correct3 = f->func.f_fma(s[j], s2[j], -0.0f,
-                                                     CORRECTLY_ROUNDED);
-                            if (skipNanInf)
-                            {
-                                if (fetestexcept(FE_OVERFLOW)) continue;
-                                // Note: no double rounding here.  Reference
-                                // functions calculate in single precision.
-                                if (IsFloatInfinity(correct2)
-                                    || IsFloatNaN(correct2)
-                                    || IsFloatInfinity(correct3)
-                                    || IsFloatNaN(correct3))
-                                    continue;
-                            }
-                            err2 = Ulp_Error(test, correct2);
-                            err3 = Ulp_Error(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= float_ulps))
-                                    && (!(fabsf(err3) <= float_ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            // retry per section
-                            if (0.0f == test
-                                && (0.0f
-                                        == f->func.f_fma(s[j], s2[j], 0.0f,
-                                                         FLUSHED)
-                                    || 0.0f
-                                        == f->func.f_fma(s[j], s2[j], -0.0f,
-                                                         FLUSHED)))
-                            {
-                                fail = 0;
-                                err = 0.0f;
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                        maxErrorVal2 = s2[j];
-                        maxErrorVal3 = s3[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error(
-                            "\nERROR: %s%s: %f ulp error at {%a, %a, %a} "
-                            "({0x%8.8x, 0x%8.8x, 0x%8.8x}): *%a vs. %a\n",
-                            f->name, sizeNames[k], err, s[j], s2[j], s3[j],
-                            ((cl_uint *)s)[j], ((cl_uint *)s2)[j],
-                            ((cl_uint *)s3)[j], ((float *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
-             maxErrorVal3);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/test_functions.h b/test_conformance/math_brute_force/test_functions.h
deleted file mode 100644
index 78aef9c..0000000
--- a/test_conformance/math_brute_force/test_functions.h
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-// float foo(float)
-int TestFunc_Float_Float(const Func *f, MTdata, bool relaxedMode);
-// double foo(double)
-int TestFunc_Double_Double(const Func *f, MTdata, bool relaxedMode);
-// int foo(float)
-int TestFunc_Int_Float(const Func *f, MTdata, bool relaxedMode);
-// int foo(double)
-int TestFunc_Int_Double(const Func *f, MTdata, bool relaxedMode);
-// float foo(uint)
-int TestFunc_Float_UInt(const Func *f, MTdata, bool relaxedMode);
-// double foo(ulong)
-int TestFunc_Double_ULong(const Func *f, MTdata, bool relaxedMode);
-// Returns {0, 1} for scalar and {0, -1} for vector.
-// int foo(float)
-int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode);
-// Returns {0, 1} for scalar and {0, -1} for vector.
-// int foo(double)
-int TestMacro_Int_Double(const Func *f, MTdata, bool relaxedMode);
-// float foo(float, float)
-int TestFunc_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
-// double foo(double, double)
-int TestFunc_Double_Double_Double(const Func *f, MTdata, bool relaxedMode);
-// Special handling for nextafter.
-// float foo(float, float)
-int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata,
-                                         bool relaxedMode);
-// Special handling for nextafter.
-// double foo(double, double)
-int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata,
-                                            bool relaxedMode);
-// float op float
-int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata,
-                                        bool relaxedMode);
-// double op double
-int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata,
-                                           bool relaxedMode);
-// float foo(float, int)
-int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode);
-// double foo(double, int)
-int TestFunc_Double_Double_Int(const Func *f, MTdata, bool relaxedMode);
-// Returns {0, 1} for scalar and {0, -1} for vector.
-// int foo(float, float)
-int TestMacro_Int_Float_Float(const Func *f, MTdata, bool relaxedMode);
-// Returns {0, 1} for scalar and {0, -1} for vector.
-// int foo(double, double)
-int TestMacro_Int_Double_Double(const Func *f, MTdata, bool relaxedMode);
-// float foo(float, float, float)
-int TestFunc_Float_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
-// double foo(double, double, double)
-int TestFunc_Double_Double_Double_Double(const Func *f, MTdata,
-                                         bool relaxedMode);
-// float foo(float, float*)
-int TestFunc_Float2_Float(const Func *f, MTdata, bool relaxedMode);
-// double foo(double, double*)
-int TestFunc_Double2_Double(const Func *f, MTdata, bool relaxedMode);
-// float foo(float, int*)
-int TestFunc_FloatI_Float(const Func *f, MTdata, bool relaxedMode);
-// double foo(double, int*)
-int TestFunc_DoubleI_Double(const Func *f, MTdata, bool relaxedMode);
-// float foo(float, float, int*)
-int TestFunc_FloatI_Float_Float(const Func *f, MTdata, bool relaxedMode);
-// double foo(double, double, int*)
-int TestFunc_DoubleI_Double_Double(const Func *f, MTdata, bool relaxedMode);
-// Special handling for mad.
-// float mad(float, float, float)
-int TestFunc_mad_Float(const Func *f, MTdata, bool relaxedMode);
-// Special handling for mad.
-// double mad(double, double, double)
-int TestFunc_mad_Double(const Func *f, MTdata, bool relaxedMode);
diff --git a/test_conformance/math_brute_force/unary.cpp b/test_conformance/math_brute_force/unary.cpp
new file mode 100644
index 0000000..a979d07
--- /dev/null
+++ b/test_conformance/math_brute_force/unary.cpp
@@ -0,0 +1,1209 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include "FunctionList.h"
+#if defined( __APPLE__ )
+    #include <sys/time.h>
+int TestFunc_Float_Float(const Func *f, MTdata);
+int TestFunc_Double_Double(const Func *f, MTdata);
+extern const vtbl _unary = { "unary", TestFunc_Float_Float,
+                             TestFunc_Double_Double };
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = {
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       f0 = ", name, "( f0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+    const char *c[] = {     "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                        "       f0 = ", name, "( f0 );\n"
+                        "       vstore3( f0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       double3 f0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       f0 = ", name, "( f0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = f0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = f0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value.  Init to 0.
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+typedef struct TestInfo
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+    int         isRangeLimited;                     // 1 if the function is only to be evaluated over a range
+    float       half_sin_cos_tan_limit;
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+int TestFunc_Float_Float(const Func *f, MTdata d)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    int skipTestingRelaxed = ( gTestFastRelaxed && strcmp(f->name,"tan") == 0 );
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+    }
+    // Check for special cases for unary float
+    test_info.isRangeLimited = 0;
+    test_info.half_sin_cos_tan_limit = 0;
+    if( 0 == strcmp( f->name, "half_sin") || 0 == strcmp( f->name, "half_cos") )
+    {
+        test_info.isRangeLimited = 1;
+        test_info.half_sin_cos_tan_limit = 1.0f + test_info.ulps * (FLT_EPSILON/2.0f);             // out of range results from finite inputs must be in [-1,1]
+    }
+    else if( 0 == strcmp( f->name, "half_tan"))
+    {
+        test_info.isRangeLimited = 1;
+        test_info.half_sin_cos_tan_limit = INFINITY;             // out of range resut from finite inputs must be numeric
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+    if( !gSkipCorrectnessTesting || skipTestingRelaxed)
+    {
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            }
+        }
+        if( error )
+            goto exit;
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+        if( skipTestingRelaxed )
+        {
+          vlog(" (rlx skip correctness testing)\n");
+          goto exit;
+        }
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( strstr( f->name, "exp" ) || strstr( f->name, "sin" ) || strstr( f->name, "cos" ) || strstr( f->name, "tan" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+                ((float*)p)[j] = (float) genrand_real1(d);
+        else if( strstr( f->name, "log" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+                p[j] = genrand_int32(d) & 0x7fffffff;
+        else
+            for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+                p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError( test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double current_time = SubtractTime( endTime, startTime );
+                sum += current_time;
+                if( current_time < bestTime )
+                    bestTime = current_time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+    vlog( "\n" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    float   ulps = job->ulps;
+    fptr    func = job->f->func;
+    const char * fname = job->f->name;
+    if ( gTestFastRelaxed  )
+    {
+        ulps = job->f->relaxed_error;
+        func = job->f->rfunc;
+    }
+    cl_uint j, k;
+    cl_int error;
+    int isRangeLimited = job->isRangeLimited;
+    float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit;
+    int ftz = job->ftz;
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    // Write the new values to the input array
+    cl_uint *p = (cl_uint*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+      p[j] = base + j * scale;
+      if( gTestFastRelaxed )
+      {
+        float p_j = *(float *) &p[j];
+        if ( strcmp(fname,"sin")==0 || strcmp(fname,"cos")==0 )  //the domain of the function is [-pi,pi]
+        {
+          if( fabs(p_j) > M_PI )
+            p[j] = NAN;
+        }
+        if ( strcmp( fname, "reciprocal" ) == 0 )
+        {
+          if( fabs(p_j) > 0x7E800000 ) //the domain of the function is [2^-126,2^126]
+            p[j] = NAN;
+        }
+      }
+    }
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+    //Calculate the correctly rounded reference result
+    float *r = (float *)gOut_Ref + thread_id * buffer_elements;
+    float *s = (float *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (float) func.f_f( s[j] );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+    //Verify data
+    uint32_t *t = (uint32_t *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            uint32_t *q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                float test = ((float*) q)[j];
+                double correct = func.f_f( s[j] );
+                float err = Ulp_Error( test, correct );
+                float abs_error = Abs_Error( test, correct );
+                int fail = 0;
+                int use_abs_error = 0;
+                // it is possible for the output to not match the reference result but for Ulp_Error
+                // to be zero, for example -1.#QNAN vs. 1.#QNAN. In such cases there is no failure
+                if (err == 0.0f)
+                {
+                    fail = 0;
+                }
+                else if( gTestFastRelaxed )
+                {
+                    if ( strcmp(fname,"sin")==0 || strcmp(fname,"cos")==0 )
+                    {
+                        fail = ! (fabsf(abs_error) <= ulps);
+                        use_abs_error = 1;
+                    }
+                    if ( strcmp(fname, "reciprocal") == 0 )
+                    {
+                        fail = ! (fabsf(err) <= ulps);
+                    }
+                    if ( strcmp(fname, "exp") == 0 || strcmp(fname, "exp2") == 0 )
+                    {
+                        float exp_error = 3+floor(fabs(2*s[j]));
+                        fail = ! (fabsf(err) <= exp_error);
+                        ulps = exp_error;
+                    }
+                    if (strcmp(fname, "tan") == 0) {
+                        if(  !gFastRelaxedDerived )
+                        {
+                            fail = ! (fabsf(err) <= ulps);
+                        }
+                        // Else fast math derived implementation does not require ULP verification
+                    }
+                    if (strcmp(fname, "exp10") == 0)
+                    {
+                        if(  !gFastRelaxedDerived )
+                        {
+                            fail = ! (fabsf(err) <= ulps);
+                        }
+                        // Else fast math derived implementation does not require ULP verification
+                    }
+                    if ( strcmp(fname,"log") == 0 || strcmp(fname,"log2") == 0 )
+                    {
+                        if( s[j] >= 0.5 && s[j] <= 2 )
+                        {
+                            fail = ! (fabsf(abs_error) <= ulps );
+                        }
+                        else
+                        {
+                            ulps = gIsEmbedded ? job->f->float_embedded_ulps : job->f->float_ulps;
+                            fail = ! (fabsf(err) <= ulps);
+                        }
+                    }
+                    // fast-relaxed implies finite-only
+                    if( IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                        IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        ) {
+                        fail = 0;
+                        err = 0;
+                    }
+                }
+                else
+                {
+                  fail = ! (fabsf(err) <= ulps);
+                }
+                // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
+                if( isRangeLimited && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) && fabsf(s[j]) < INFINITY )
+                {
+                    if( fabsf( test ) <= half_sin_cos_tan_limit )
+                    {
+                        err = 0;
+                        fail = 0;
+                    }
+                }
+                if( fail )
+                {
+                    if( ftz )
+                    {
+                        typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold.
+                        CheckForSubnormal isFloatResultSubnormalPtr;
+                        if ( gTestFastRelaxed )
+                        {
+                          isFloatResultSubnormalPtr = &IsFloatResultSubnormalAbsError;
+                        }
+                        else
+                        {
+                          isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
+                        }
+                        // retry per section
+                        if( (*isFloatResultSubnormalPtr)(correct, ulps) )
+                        {
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        // retry per section
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            double correct2 = func.f_f( 0.0 );
+                            double correct3 = func.f_f( -0.0 );
+                            float err2;
+                            float err3;
+                            if( use_abs_error )
+                            {
+                              err2 = Abs_Error( test, correct2  );
+                              err3 = Abs_Error( test, correct3  );
+                            }
+                            else
+                            {
+                              err2 = Ulp_Error( test, correct2  );
+                              err3 = Ulp_Error( test, correct3  );
+                            }
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( (*isFloatResultSubnormalPtr)(correct2, ulps ) || (*isFloatResultSubnormalPtr)(correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at %a (0x%8.8x): *%a vs. %a\n", job->f->name, sizeNames[k], err, ((float*) s)[j], ((uint32_t*) s)[j], ((float*) t)[j], test);
+                    return -1;
+                }
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+    return CL_SUCCESS;
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    float   ulps = job->ulps;
+    dptr    func = job->f->dfunc;
+    cl_uint j, k;
+    cl_int error;
+    int ftz = job->ftz;
+    Force64BitFPUPrecision();
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+    // Write the new values to the input array
+    cl_double *p = (cl_double*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        p[j] = DoubleFromUInt32( base + j * scale);
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+    //Calculate the correctly rounded reference result
+    cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
+    cl_double *s = (cl_double *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_f( s[j] );
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+    //Verify data
+    cl_ulong *t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_f( s[j] );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+                if( fail )
+                {
+                    if( ftz )
+                    {
+                        // retry per section
+                        if( IsDoubleResultSubnormal(correct, ulps) )
+                        {
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        // retry per section
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            long double correct2 = func.f_f( 0.0L );
+                            long double correct3 = func.f_f( -0.0L );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            // retry per section
+                            if( IsDoubleResultSubnormal(correct2, ulps ) || IsDoubleResultSubnormal(correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at %.13la (0x%16.16llx): *%.13la vs. %.13la\n", job->f->name, sizeNames[k], err, ((cl_double*) gIn)[j], ((cl_ulong*) gIn)[j], ((cl_double*) gOut_Ref)[j], test );
+                    return -1;
+                }
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zd buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, buffer_elements, job->scale, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+    return CL_SUCCESS;
+int TestFunc_Double_Double(const Func *f, MTdata d)
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+#if defined( __APPLE__ )
+    struct timeval  time_val;
+    gettimeofday( &time_val, NULL );
+    double start_time = time_val.tv_sec + 1e-6 * time_val.tv_usec;
+    double end_time;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+    }
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+           goto exit;
+    }
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            }
+        }
+        if( error )
+            goto exit;
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+#if defined( __APPLE__ )
+    gettimeofday( &time_val, NULL);
+    end_time = time_val.tv_sec + 1e-6 * time_val.tv_usec;
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        if( strstr( f->name, "exp" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+                p[j] = (double)genrand_real1(d);
+        else if( strstr( f->name, "log" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+                p[j] = fabs(DoubleFromUInt32( genrand_int32(d)));
+        else
+            for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+                p[j] = DoubleFromUInt32( genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double current_time = SubtractTime( endTime, startTime );
+                sum += current_time;
+                if( current_time < bestTime )
+                    bestTime = current_time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+#if defined( __APPLE__ )
+    vlog( "\t(%2.2f seconds)", end_time - start_time );
+    vlog( "\n" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+        free( test_info.tinfo );
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
deleted file mode 100644
index f6fa326..0000000
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ /dev/null
@@ -1,549 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double maxErrorValue; // position of the max error value.  Init to 0.
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-    int isRangeLimited; // 1 if the function is only to be evaluated over a
-                        // range
-    float half_sin_cos_tan_limit;
-    bool relaxedMode; // True if test is running in relaxed mode, false
-                      // otherwise.
-} TestInfo;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-    test_info.relaxedMode = relaxedMode;
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-            }
-        }
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_double);
-    cl_uint scale = job->scale;
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    float ulps = job->ulps;
-    dptr func = job->f->dfunc;
-    cl_int error;
-    int ftz = job->ftz;
-    Force64BitFPUPrecision();
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Write the new values to the input array
-    cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        p[j] = DoubleFromUInt32(base + j * scale);
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        return error;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            return error;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-    // Calculate the correctly rounded reference result
-    cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
-    cl_double *s = (cl_double *)p;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_f(s[j]);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Verify data
-    cl_ulong *t = (cl_ulong *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            cl_ulong *q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_f(s[j]);
-                float err = Bruteforce_Ulp_Error_Double(test, correct);
-                int fail = !(fabsf(err) <= ulps);
-                if (fail)
-                {
-                    if (ftz)
-                    {
-                        // retry per section
-                        if (IsDoubleResultSubnormal(correct, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                        // retry per section
-                        if (IsDoubleSubnormal(s[j]))
-                        {
-                            long double correct2 = func.f_f(0.0L);
-                            long double correct3 = func.f_f(-0.0L);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct2);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            // retry per section
-                            if (IsDoubleResultSubnormal(correct2, ulps)
-                                || IsDoubleResultSubnormal(correct3, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                }
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                }
-                if (fail)
-                {
-                    vlog_error("\nERROR: %s%s: %f ulp error at %.13la "
-                               "(0x%16.16llx): *%.13la vs. %.13la\n",
-                               job->f->name, sizeNames[k], err,
-                               ((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j],
-                               ((cl_double *)gOut_Ref)[j], test);
-                    return -1;
-                }
-            }
-        }
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10zd buf_elements:%10u ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, buffer_elements, job->scale, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    return CL_SUCCESS;
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
deleted file mode 100644
index 17edc58..0000000
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ /dev/null
@@ -1,727 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
-// Thread specific data for a worker thread
-typedef struct ThreadInfo
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    float maxError; // max error value. Init to 0.
-    double maxErrorValue; // position of the max error value.  Init to 0.
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
-    size_t subBufferSize; // Size of the sub-buffer in elements
-    const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
-    cl_uint threadCount; // Number of worker threads
-    cl_uint jobCount; // Number of jobs
-    cl_uint step; // step between each chunk and the next.
-    cl_uint scale; // stride between individual test values
-    float ulps; // max_allowed ulps
-    int ftz; // non-zero if running in flush to zero mode
-    int isRangeLimited; // 1 if the function is only to be evaluated over a
-                        // range
-    float half_sin_cos_tan_limit;
-    bool relaxedMode; // True if test is running in relaxed mode, false
-                      // otherwise.
-} TestInfo;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    test_info.relaxedMode = relaxedMode;
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-    // Check for special cases for unary float
-    test_info.isRangeLimited = 0;
-    test_info.half_sin_cos_tan_limit = 0;
-    if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
-    {
-        test_info.isRangeLimited = 1;
-        test_info.half_sin_cos_tan_limit = 1.0f
-            + test_info.ulps
-                * (FLT_EPSILON / 2.0f); // out of range results from finite
-                                        // inputs must be in [-1,1]
-    }
-    else if (0 == strcmp(f->name, "half_tan"))
-    {
-        test_info.isRangeLimited = 1;
-        test_info.half_sin_cos_tan_limit =
-            INFINITY; // out of range resut from finite inputs must be numeric
-    }
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-    // Run the kernels
-    if (!gSkipCorrectnessTesting || skipTestingRelaxed)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-            }
-        }
-        if (error) goto exit;
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        if (skipTestingRelaxed)
-        {
-            vlog(" (rlx skip correctness testing)\n");
-            goto exit;
-        }
-        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
-    }
-    vlog("\n");
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-        free(test_info.tinfo);
-    }
-    return error;
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-    const TestInfo *job = (const TestInfo *)data;
-    size_t buffer_elements = job->subBufferSize;
-    size_t buffer_size = buffer_elements * sizeof(cl_float);
-    cl_uint scale = job->scale;
-    cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
-    fptr func = job->f->func;
-    const char *fname = job->f->name;
-    bool relaxedMode = job->relaxedMode;
-    float ulps = getAllowedUlpError(job->f, relaxedMode);
-    if (relaxedMode)
-    {
-        func = job->f->rfunc;
-    }
-    cl_int error;
-    int isRangeLimited = job->isRangeLimited;
-    float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit;
-    int ftz = job->ftz;
-    // start the map of the output arrays
-    cl_event e[VECTOR_SIZE_COUNT];
-    cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
-    // Write the new values to the input array
-    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        p[j] = base + j * scale;
-        if (relaxedMode)
-        {
-            float p_j = *(float *)&p[j];
-            if (strcmp(fname, "sin") == 0
-                || strcmp(fname, "cos")
-                    == 0) // the domain of the function is [-pi,pi]
-            {
-                if (fabs(p_j) > M_PI) ((float *)p)[j] = NAN;
-            }
-            if (strcmp(fname, "reciprocal") == 0)
-            {
-                const float l_limit = HEX_FLT(+, 1, 0, -, 126);
-                const float u_limit = HEX_FLT(+, 1, 0, +, 126);
-                if (fabs(p_j) < l_limit
-                    || fabs(p_j) > u_limit) // the domain of the function is
-                                            // [2^-126,2^126]
-                    ((float *)p)[j] = NAN;
-            }
-        }
-    }
-    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
-                                      buffer_size, p, 0, NULL, NULL)))
-    {
-        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        return error;
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
-        }
-        // Fill the result buffer with garbage, so that old results don't carry
-        // over
-        uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
-            return error;
-        }
-        // run the kernel
-        size_t vectorCount =
-            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
-        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
-                                                 // own copy of the cl_kernel
-        cl_program program = job->programs[j];
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
-                                            &vectorCount, NULL, 0, NULL, NULL)))
-        {
-            vlog_error("FAILED -- could not execute kernel\n");
-            return error;
-        }
-    }
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
-    if (gSkipCorrectnessTesting) return CL_SUCCESS;
-    // Calculate the correctly rounded reference result
-    float *r = (float *)gOut_Ref + thread_id * buffer_elements;
-    float *s = (float *)p;
-    for (size_t j = 0; j < buffer_elements; j++) r[j] = (float)func.f_f(s[j]);
-    // Read the data back -- no need to wait for the first N-1 buffers but wait
-    // for the last buffer. This is an in order queue.
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
-            buffer_size, 0, NULL, NULL, &error);
-        if (error || NULL == out[j])
-        {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
-        }
-    }
-    // Verify data
-    uint32_t *t = (uint32_t *)r;
-    for (size_t j = 0; j < buffer_elements; j++)
-    {
-        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-        {
-            uint32_t *q = out[k];
-            // If we aren't getting the correctly rounded result
-            if (t[j] != q[j])
-            {
-                float test = ((float *)q)[j];
-                double correct = func.f_f(s[j]);
-                float err = Ulp_Error(test, correct);
-                float abs_error = Abs_Error(test, correct);
-                int fail = 0;
-                int use_abs_error = 0;
-                // it is possible for the output to not match the reference
-                // result but for Ulp_Error to be zero, for example -1.#QNAN
-                // vs. 1.#QNAN. In such cases there is no failure
-                if (err == 0.0f)
-                {
-                    fail = 0;
-                }
-                else if (relaxedMode)
-                {
-                    if (strcmp(fname, "sin") == 0 || strcmp(fname, "cos") == 0)
-                    {
-                        fail = !(fabsf(abs_error) <= ulps);
-                        use_abs_error = 1;
-                    }
-                    if (strcmp(fname, "sinpi") == 0
-                        || strcmp(fname, "cospi") == 0)
-                    {
-                        if (s[j] >= -1.0 && s[j] <= 1.0)
-                        {
-                            fail = !(fabsf(abs_error) <= ulps);
-                            use_abs_error = 1;
-                        }
-                    }
-                    if (strcmp(fname, "reciprocal") == 0)
-                    {
-                        fail = !(fabsf(err) <= ulps);
-                    }
-                    if (strcmp(fname, "exp") == 0 || strcmp(fname, "exp2") == 0)
-                    {
-                        float exp_error = ulps;
-                        if (!gIsEmbedded)
-                        {
-                            exp_error += floor(fabs(2 * s[j]));
-                        }
-                        fail = !(fabsf(err) <= exp_error);
-                        ulps = exp_error;
-                    }
-                    if (strcmp(fname, "tan") == 0)
-                    {
-                        if (!gFastRelaxedDerived)
-                        {
-                            fail = !(fabsf(err) <= ulps);
-                        }
-                        // Else fast math derived implementation does not
-                        // require ULP verification
-                    }
-                    if (strcmp(fname, "exp10") == 0)
-                    {
-                        if (!gFastRelaxedDerived)
-                        {
-                            fail = !(fabsf(err) <= ulps);
-                        }
-                        // Else fast math derived implementation does not
-                        // require ULP verification
-                    }
-                    if (strcmp(fname, "log") == 0 || strcmp(fname, "log2") == 0
-                        || strcmp(fname, "log10") == 0)
-                    {
-                        if (s[j] >= 0.5 && s[j] <= 2)
-                        {
-                            fail = !(fabsf(abs_error) <= ulps);
-                        }
-                        else
-                        {
-                            ulps = gIsEmbedded ? job->f->float_embedded_ulps
-                                               : job->f->float_ulps;
-                            fail = !(fabsf(err) <= ulps);
-                        }
-                    }
-                    // fast-relaxed implies finite-only
-                    if (IsFloatInfinity(correct) || IsFloatNaN(correct)
-                        || IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
-                    {
-                        fail = 0;
-                        err = 0;
-                    }
-                }
-                else
-                {
-                    fail = !(fabsf(err) <= ulps);
-                }
-                // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
-                if (isRangeLimited
-                    && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16)
-                    && fabsf(s[j]) < INFINITY)
-                {
-                    if (fabsf(test) <= half_sin_cos_tan_limit)
-                    {
-                        err = 0;
-                        fail = 0;
-                    }
-                }
-                if (fail)
-                {
-                    if (ftz)
-                    {
-                        typedef int (*CheckForSubnormal)(
-                            double, float); // If we are in fast relaxed math,
-                                            // we have a different calculation
-                                            // for the subnormal threshold.
-                        CheckForSubnormal isFloatResultSubnormalPtr;
-                        if (relaxedMode)
-                        {
-                            isFloatResultSubnormalPtr =
-                                &IsFloatResultSubnormalAbsError;
-                        }
-                        else
-                        {
-                            isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
-                        }
-                        // retry per section
-                        if ((*isFloatResultSubnormalPtr)(correct, ulps))
-                        {
-                            fail = fail && (test != 0.0f);
-                            if (!fail) err = 0.0f;
-                        }
-                        // retry per section
-                        if (IsFloatSubnormal(s[j]))
-                        {
-                            double correct2 = func.f_f(0.0);
-                            double correct3 = func.f_f(-0.0);
-                            float err2;
-                            float err3;
-                            if (use_abs_error)
-                            {
-                                err2 = Abs_Error(test, correct2);
-                                err3 = Abs_Error(test, correct3);
-                            }
-                            else
-                            {
-                                err2 = Ulp_Error(test, correct2);
-                                err3 = Ulp_Error(test, correct3);
-                            }
-                            fail = fail
-                                && ((!(fabsf(err2) <= ulps))
-                                    && (!(fabsf(err3) <= ulps)));
-                            if (fabsf(err2) < fabsf(err)) err = err2;
-                            if (fabsf(err3) < fabsf(err)) err = err3;
-                            // retry per section
-                            if ((*isFloatResultSubnormalPtr)(correct2, ulps)
-                                || (*isFloatResultSubnormalPtr)(correct3, ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                }
-                if (fabsf(err) > tinfo->maxError)
-                {
-                    tinfo->maxError = fabsf(err);
-                    tinfo->maxErrorValue = s[j];
-                }
-                if (fail)
-                {
-                    vlog_error("\nERROR: %s%s: %f ulp error at %a (0x%8.8x): "
-                               "*%a vs. %a\n",
-                               job->f->name, sizeNames[k], err, ((float *)s)[j],
-                               ((uint32_t *)s)[j], ((float *)t)[j], test);
-                    return -1;
-                }
-            }
-        }
-    }
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-    {
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
-        {
-            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
-                       j, error);
-            return error;
-        }
-    }
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
-    if (0 == (base & 0x0fffffff))
-    {
-        if (gVerboseBruteForce)
-        {
-            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
-                 "ThreadCount:%2u\n",
-                 base, job->step, job->scale, buffer_elements, job->ulps,
-                 job->threadCount);
-        }
-        else
-        {
-            vlog(".");
-        }
-        fflush(stdout);
-    }
-    return CL_SUCCESS;
diff --git a/test_conformance/math_brute_force/unary_two_results.cpp b/test_conformance/math_brute_force/unary_two_results.cpp
new file mode 100644
index 0000000..a219741
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_two_results.cpp
@@ -0,0 +1,992 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include "FunctionList.h"
+int TestFunc_Float2_Float(const Func *f, MTdata);
+int TestFunc_Double2_Double(const Func *f, MTdata);
+extern const vtbl _unary_two_results = { "unary_two_results",
+                                         TestFunc_Float2_Float,
+                                         TestFunc_Double2_Double };
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* out2, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 iout = NAN;\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( iout, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 iout = NAN;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = iout.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = iout.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* out2, __global double* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 iout = NAN;\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( iout, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 iout = NAN;\n"
+                            "       double3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = iout.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = iout.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+int TestFunc_Float2_Float(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    uint32_t l;
+    int error;
+    char const * testing_mode;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError0 = 0.0f;
+    float maxError1 = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal0 = 0.0f;
+    float maxErrorVal1 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
+    cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
+    int isFract = 0 == strcmp( "fract", f->nameInCode );
+    int skipNanInf = isFract  && ! gInfNanSupport;
+    float float_ulps;
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+    if (gTestFastRelaxed)
+      float_ulps = f->relaxed_error;
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+          for( j = 0; j < bufferSize / sizeof( float ); j++ )
+          {
+            p[j] = (uint32_t) i + j * scale;
+            if ( gTestFastRelaxed && strcmp(f->name,"sincos") == 0 )
+            {
+              float pj = *(float *)&p[j];
+              if(fabs(pj) > M_PI)
+                p[j] = NAN;
+            }
+          }
+        }
+        else
+        {
+          for( j = 0; j < bufferSize / sizeof( float ); j++ )
+          {
+            p[j] = (uint32_t) i + j;
+            if ( gTestFastRelaxed && strcmp(f->name,"sincos") == 0 )
+            {
+              float pj = *(float *)&p[j];
+              if(fabs(pj) > M_PI)
+                p[j] = NAN;
+            }
+          }
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        FPU_mode_type oldMode;
+        RoundingMode oldRoundMode = kRoundToNearestEven;
+        if( isFract )
+        {
+            //Calculate the correctly rounded reference result
+            memset( &oldMode, 0, sizeof( oldMode ) );
+            if( ftz )
+                ForceFTZ( &oldMode );
+            // Set the rounding mode to match the device
+            if (gIsInRTZMode)
+                oldRoundMode = set_round(kRoundTowardZero, kfloat);
+        }
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        float *r2 = (float *)gOut_Ref2;
+        float *s = (float *)gIn;
+        if( skipNanInf )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            {
+                double dd;
+                feclearexcept(FE_OVERFLOW);
+                if( gTestFastRelaxed )
+                    r[j] = (float) f->rfunc.f_fpf( s[j], &dd );
+                else
+                    r[j] = (float) f->func.f_fpf( s[j], &dd );
+                r2[j] = (float) dd;
+                overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            {
+                double dd;
+                if( gTestFastRelaxed )
+                  r[j] = (float) f->rfunc.f_fpf( s[j], &dd );
+                else
+                  r[j] = (float) f->func.f_fpf( s[j], &dd );
+                r2[j] = (float) dd;
+            }
+        }
+        if( isFract && ftz )
+            RestoreFPState( &oldMode );
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+        {
+            if (isFract && gIsInRTZMode)
+                (void)set_round(oldRoundMode, kfloat);
+            break;
+        }
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        uint32_t *t2 = (uint32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)gOut[k];
+                uint32_t *q2 = (uint32_t *)gOut2[k];
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j]  )
+                {
+                    double correct, correct2;
+                    float err, err2;
+                    float test = ((float*) q)[j];
+                    float test2 = ((float*) q2)[j];
+                    if( gTestFastRelaxed )
+                      correct = f->rfunc.f_fpf( s[j], &correct2 );
+                    else
+                      correct = f->func.f_fpf( s[j], &correct2 );
+                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                    if (gTestFastRelaxed || skipNanInf)
+                    {
+                        if (skipNanInf && overflow[j])
+                            continue;
+                        // Note: no double rounding here.  Reference functions calculate in single precision.
+                        if( IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                            IsFloatInfinity(correct2)|| IsFloatNaN(correct2)    ||
+                            IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        )
+                            continue;
+                    }
+                    typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold.
+                    CheckForSubnormal isFloatResultSubnormalPtr;
+                    if( gTestFastRelaxed )
+                    {
+                      err = Abs_Error( test, correct);
+                      err2 = Abs_Error( test2, correct2);
+                      isFloatResultSubnormalPtr = &IsFloatResultSubnormalAbsError;
+                    }
+                    else
+                    {
+                        err = Ulp_Error( test, correct );
+                        err2 = Ulp_Error( test2, correct2 );
+                        isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
+                    }
+                    int fail = ! (fabsf(err) <= float_ulps && fabsf(err2) <= float_ulps);
+                    if( ftz )
+                    {
+                        // retry per section
+                        if( (*isFloatResultSubnormalPtr)(correct, float_ulps) )
+                        {
+                            if( (*isFloatResultSubnormalPtr) (correct2, float_ulps ))
+                            {
+                                fail = fail && ! ( test == 0.0f && test2 == 0.0f );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    err2 = 0.0f;
+                                }
+                            }
+                            else
+                            {
+                                fail = fail && ! ( test == 0.0f && fabsf(err2) <= float_ulps);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                        else if( (*isFloatResultSubnormalPtr)(correct2, float_ulps ) )
+                        {
+                            fail = fail && ! ( test2 == 0.0f && fabsf(err) <= float_ulps);
+                            if( ! fail )
+                                err2 = 0.0f;
+                        }
+                        // retry per section
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            double correctp, correctn;
+                            double correct2p, correct2n;
+                            float errp, err2p, errn, err2n;
+                            if( skipNanInf )
+                                feclearexcept(FE_OVERFLOW);
+                            if ( gTestFastRelaxed )
+                            {
+                              correctp = f->rfunc.f_fpf( 0.0, &correct2p );
+                              correctn = f->rfunc.f_fpf( -0.0, &correct2n );
+                            }
+                            else
+                            {
+                              correctp = f->func.f_fpf( 0.0, &correct2p );
+                              correctn = f->func.f_fpf( -0.0, &correct2n );
+                            }
+                            // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept(FE_OVERFLOW) )
+                                    continue;
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correctp) || IsFloatNaN(correctp)   ||
+                                    IsFloatInfinity(correctn) || IsFloatNaN(correctn)   ||
+                                    IsFloatInfinity(correct2p) || IsFloatNaN(correct2p) ||
+                                    IsFloatInfinity(correct2n) || IsFloatNaN(correct2n) )
+                                    continue;
+                            }
+                            if ( gTestFastRelaxed )
+                            {
+                              errp = Abs_Error( test, correctp  );
+                              err2p = Abs_Error( test, correct2p  );
+                              errn = Abs_Error( test, correctn  );
+                              err2n = Abs_Error( test, correct2n  );
+                            }
+                            else
+                            {
+                              errp = Ulp_Error( test, correctp  );
+                              err2p = Ulp_Error( test, correct2p  );
+                              errn = Ulp_Error( test, correctn  );
+                              err2n = Ulp_Error( test, correct2n  );
+                            }
+                            fail =  fail && ((!(fabsf(errp) <= float_ulps)) && (!(fabsf(err2p) <= float_ulps))    &&
+                                            ((!(fabsf(errn) <= float_ulps)) && (!(fabsf(err2n) <= float_ulps))) );
+                            if( fabsf( errp ) < fabsf(err ) )
+                                err = errp;
+                            if( fabsf( errn ) < fabsf(err ) )
+                                err = errn;
+                            if( fabsf( err2p ) < fabsf(err2 ) )
+                                err2 = err2p;
+                            if( fabsf( err2n ) < fabsf(err2 ) )
+                                err2 = err2n;
+                            // retry per section
+                            if(  (*isFloatResultSubnormalPtr)( correctp, float_ulps ) || (*isFloatResultSubnormalPtr)( correctn, float_ulps )  )
+                            {
+                              if( (*isFloatResultSubnormalPtr)( correct2p, float_ulps ) || (*isFloatResultSubnormalPtr)( correct2n, float_ulps ) )
+                              {
+                                fail = fail && !( test == 0.0f && test2 == 0.0f);
+                                if( ! fail )
+                                  err = err2 = 0.0f;
+                              }
+                              else
+                              {
+                                fail = fail && ! (test == 0.0f && fabsf(err2) <= float_ulps);
+                                if( ! fail )
+                                  err = 0.0f;
+                              }
+                            }
+                            else if( (*isFloatResultSubnormalPtr)( correct2p, float_ulps ) || (*isFloatResultSubnormalPtr)( correct2n, float_ulps ) )
+                            {
+                                fail = fail && ! (test2 == 0.0f && (fabsf(err) <= float_ulps));
+                                if( ! fail )
+                                    err2 = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError0 )
+                    {
+                        maxError0 = fabsf(err);
+                        maxErrorVal0 = s[j];
+                    }
+                    if( fabsf(err2 ) > maxError1 )
+                    {
+                        maxError1 = fabsf(err2);
+                        maxErrorVal1 = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: {%f, %f} ulp error at %a: *{%a, %a} vs. {%a, %a}\n", f->name, sizeNames[k], err, err2, ((float*) gIn)[j], ((float*) gOut_Ref)[j], ((float*) gOut_Ref2)[j], test, test2 );
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+        if (isFract && gIsInRTZMode)
+            (void)set_round(oldRoundMode, kfloat);
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog(".");
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j]) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, maxErrorVal1 );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
+int TestFunc_Double2_Double(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError0 = 0.0f;
+    float maxError1 = 0.0f;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal0 = 0.0f;
+    double maxErrorVal1 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( cl_double );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    Force64BitFPUPrecision();
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j * scale);
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        double *r2 = (double *)gOut_Ref2;
+        double *s = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+        {
+            long double dd;
+            r[j] = (double) f->dfunc.f_fpf( s[j], &dd );
+            r2[j] = (double) dd;
+        }
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        uint64_t *t2 = (uint64_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)(gOut[k]);
+                uint64_t *q2 = (uint64_t *)(gOut2[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j]  )
+                {
+                    double test = ((double*) q)[j];
+                    double test2 = ((double*) q2)[j];
+                    long double correct2;
+                    long double correct = f->dfunc.f_fpf( s[j], &correct2 );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    float err2 = Bruteforce_Ulp_Error_Double( test2, correct2 );
+                    int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps);
+                    if( ftz )
+                    {
+                        // retry per section
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps ) )
+                        {
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && test2 == 0.0f );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    err2 = 0.0f;
+                                }
+                            }
+                            else
+                            {
+                                fail = fail && ! ( test == 0.0f && fabsf(err2) <= f->double_ulps);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                        else if( IsDoubleResultSubnormal( correct2, f->double_ulps ) )
+                        {
+                            fail = fail && ! ( test2 == 0.0f && fabsf(err) <= f->double_ulps);
+                            if( ! fail )
+                                err2 = 0.0f;
+                        }
+                        // retry per section
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            long double correct2p, correct2n;
+                            long double correctp = f->dfunc.f_fpf( 0.0, &correct2p );
+                            long double correctn = f->dfunc.f_fpf( -0.0, &correct2n );
+                            float errp = Bruteforce_Ulp_Error_Double( test, correctp  );
+                            float err2p = Bruteforce_Ulp_Error_Double( test, correct2p  );
+                            float errn = Bruteforce_Ulp_Error_Double( test, correctn  );
+                            float err2n = Bruteforce_Ulp_Error_Double( test, correct2n  );
+                            fail =  fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps))    &&
+                                            ((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) );
+                            if( fabsf( errp ) < fabsf(err ) )
+                                err = errp;
+                            if( fabsf( errn ) < fabsf(err ) )
+                                err = errn;
+                            if( fabsf( err2p ) < fabsf(err2 ) )
+                                err2 = err2p;
+                            if( fabsf( err2n ) < fabsf(err2 ) )
+                                err2 = err2n;
+                            // retry per section
+                            if( IsDoubleResultSubnormal( correctp, f->double_ulps ) || IsDoubleResultSubnormal( correctn, f->double_ulps ) )
+                            {
+                                if( IsDoubleResultSubnormal( correct2p, f->double_ulps ) || IsDoubleResultSubnormal( correct2n, f->double_ulps ) )
+                                {
+                                    fail = fail && !( test == 0.0f && test2 == 0.0f);
+                                    if( ! fail )
+                                        err = err2 = 0.0f;
+                                }
+                                else
+                                {
+                                    fail = fail && ! (test == 0.0f && fabsf(err2) <= f->double_ulps);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                            else if( IsDoubleResultSubnormal( correct2p, f->double_ulps ) || IsDoubleResultSubnormal( correct2n, f->double_ulps ) )
+                            {
+                                fail = fail && ! (test2 == 0.0f && (fabsf(err) <= f->double_ulps));
+                                if( ! fail )
+                                    err2 = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError0 )
+                    {
+                        maxError0 = fabsf(err);
+                        maxErrorVal0 = s[j];
+                    }
+                    if( fabsf(err2 ) > maxError1 )
+                    {
+                        maxError1 = fabsf(err2);
+                        maxErrorVal1 = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: *{%.13la, %.13la} vs. {%.13la, %.13la}\n", f->name, sizeNames[k], err, err2, ((double*) gIn)[j], ((double*) gOut_Ref)[j], ((double*) gOut_Ref2)[j], test, test2 );
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double*) gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = DoubleFromUInt32(genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j]) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, maxErrorVal1 );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
deleted file mode 100644
index 71dd4f4..0000000
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ /dev/null
@@ -1,446 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* out2, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* out2, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       double3 iout = NAN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 iout = NAN;\n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError0 = 0.0f;
-    float maxError1 = 0.0f;
-    int ftz = f->ftz || gForceFTZ;
-    double maxErrorVal0 = 0.0f;
-    double maxErrorVal1 = 0.0f;
-    uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
-    int scale =
-        (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1);
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    Force64BitFPUPrecision();
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j);
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        double *r = (double *)gOut_Ref;
-        double *r2 = (double *)gOut_Ref2;
-        double *s = (double *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-        {
-            long double dd;
-            r[j] = (double)f->dfunc.f_fpf(s[j], &dd);
-            r2[j] = (double)dd;
-        }
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint64_t *t = (uint64_t *)gOut_Ref;
-        uint64_t *t2 = (uint64_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint64_t *q = (uint64_t *)(gOut[k]);
-                uint64_t *q2 = (uint64_t *)(gOut2[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j] || t2[j] != q2[j])
-                {
-                    double test = ((double *)q)[j];
-                    double test2 = ((double *)q2)[j];
-                    long double correct2;
-                    long double correct = f->dfunc.f_fpf(s[j], &correct2);
-                    float err = Bruteforce_Ulp_Error_Double(test, correct);
-                    float err2 = Bruteforce_Ulp_Error_Double(test2, correct2);
-                    int fail = !(fabsf(err) <= f->double_ulps
-                                 && fabsf(err2) <= f->double_ulps);
-                    if (ftz)
-                    {
-                        // retry per section
-                        if (IsDoubleResultSubnormal(correct, f->double_ulps))
-                        {
-                            if (IsDoubleResultSubnormal(correct2,
-                                                        f->double_ulps))
-                            {
-                                fail = fail && !(test == 0.0f && test2 == 0.0f);
-                                if (!fail)
-                                {
-                                    err = 0.0f;
-                                    err2 = 0.0f;
-                                }
-                            }
-                            else
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && fabsf(err2) <= f->double_ulps);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                        else if (IsDoubleResultSubnormal(correct2,
-                                                         f->double_ulps))
-                        {
-                            fail = fail
-                                && !(test2 == 0.0f
-                                     && fabsf(err) <= f->double_ulps);
-                            if (!fail) err2 = 0.0f;
-                        }
-                        // retry per section
-                        if (IsDoubleSubnormal(s[j]))
-                        {
-                            long double correct2p, correct2n;
-                            long double correctp =
-                                f->dfunc.f_fpf(0.0, &correct2p);
-                            long double correctn =
-                                f->dfunc.f_fpf(-0.0, &correct2n);
-                            float errp =
-                                Bruteforce_Ulp_Error_Double(test, correctp);
-                            float err2p =
-                                Bruteforce_Ulp_Error_Double(test, correct2p);
-                            float errn =
-                                Bruteforce_Ulp_Error_Double(test, correctn);
-                            float err2n =
-                                Bruteforce_Ulp_Error_Double(test, correct2n);
-                            fail = fail
-                                && ((!(fabsf(errp) <= f->double_ulps))
-                                    && (!(fabsf(err2p) <= f->double_ulps))
-                                    && ((!(fabsf(errn) <= f->double_ulps))
-                                        && (!(fabsf(err2n)
-                                              <= f->double_ulps))));
-                            if (fabsf(errp) < fabsf(err)) err = errp;
-                            if (fabsf(errn) < fabsf(err)) err = errn;
-                            if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
-                            if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
-                            // retry per section
-                            if (IsDoubleResultSubnormal(correctp,
-                                                        f->double_ulps)
-                                || IsDoubleResultSubnormal(correctn,
-                                                           f->double_ulps))
-                            {
-                                if (IsDoubleResultSubnormal(correct2p,
-                                                            f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct2n,
-                                                               f->double_ulps))
-                                {
-                                    fail = fail
-                                        && !(test == 0.0f && test2 == 0.0f);
-                                    if (!fail) err = err2 = 0.0f;
-                                }
-                                else
-                                {
-                                    fail = fail
-                                        && !(test == 0.0f
-                                             && fabsf(err2) <= f->double_ulps);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                            else if (IsDoubleResultSubnormal(correct2p,
-                                                             f->double_ulps)
-                                     || IsDoubleResultSubnormal(correct2n,
-                                                                f->double_ulps))
-                            {
-                                fail = fail
-                                    && !(test2 == 0.0f
-                                         && (fabsf(err) <= f->double_ulps));
-                                if (!fail) err2 = 0.0f;
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError0)
-                    {
-                        maxError0 = fabsf(err);
-                        maxErrorVal0 = s[j];
-                    }
-                    if (fabsf(err2) > maxError1)
-                    {
-                        maxError1 = fabsf(err2);
-                        maxErrorVal1 = s[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error(
-                            "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: "
-                            "*{%.13la, %.13la} vs. {%.13la, %.13la}\n",
-                            f->name, sizeNames[k], err, err2,
-                            ((double *)gIn)[j], ((double *)gOut_Ref)[j],
-                            ((double *)gOut_Ref2)[j], test, test2);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
-             maxErrorVal1);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
deleted file mode 100644
index 4a375ce..0000000
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ /dev/null
@@ -1,578 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* out2, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* out2, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 iout = NAN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 iout = NAN;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError0 = 0.0f;
-    float maxError1 = 0.0f;
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    float maxErrorVal0 = 0.0f;
-    float maxErrorVal1 = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1);
-    cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
-    int isFract = 0 == strcmp("fract", f->nameInCode);
-    int skipNanInf = isFract && !gInfNanSupport;
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    float float_ulps = getAllowedUlpError(f, relaxedMode);
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            {
-                p[j] = (uint32_t)i + j * scale;
-                if (relaxedMode && strcmp(f->name, "sincos") == 0)
-                {
-                    float pj = *(float *)&p[j];
-                    if (fabs(pj) > M_PI) ((float *)p)[j] = NAN;
-                }
-            }
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            {
-                p[j] = (uint32_t)i + j;
-                if (relaxedMode && strcmp(f->name, "sincos") == 0)
-                {
-                    float pj = *(float *)&p[j];
-                    if (fabs(pj) > M_PI) ((float *)p)[j] = NAN;
-                }
-            }
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        FPU_mode_type oldMode;
-        RoundingMode oldRoundMode = kRoundToNearestEven;
-        if (isFract)
-        {
-            // Calculate the correctly rounded reference result
-            memset(&oldMode, 0, sizeof(oldMode));
-            if (ftz) ForceFTZ(&oldMode);
-            // Set the rounding mode to match the device
-            if (gIsInRTZMode)
-                oldRoundMode = set_round(kRoundTowardZero, kfloat);
-        }
-        // Calculate the correctly rounded reference result
-        float *r = (float *)gOut_Ref;
-        float *r2 = (float *)gOut_Ref2;
-        float *s = (float *)gIn;
-        if (skipNanInf)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            {
-                double dd;
-                feclearexcept(FE_OVERFLOW);
-                if (relaxedMode)
-                    r[j] = (float)f->rfunc.f_fpf(s[j], &dd);
-                else
-                    r[j] = (float)f->func.f_fpf(s[j], &dd);
-                r2[j] = (float)dd;
-                overflow[j] =
-                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-            }
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            {
-                double dd;
-                if (relaxedMode)
-                    r[j] = (float)f->rfunc.f_fpf(s[j], &dd);
-                else
-                    r[j] = (float)f->func.f_fpf(s[j], &dd);
-                r2[j] = (float)dd;
-            }
-        }
-        if (isFract && ftz) RestoreFPState(&oldMode);
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting)
-        {
-            if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
-            break;
-        }
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        uint32_t *t2 = (uint32_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)gOut[k];
-                uint32_t *q2 = (uint32_t *)gOut2[k];
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j] || t2[j] != q2[j])
-                {
-                    double correct, correct2;
-                    float err, err2;
-                    float test = ((float *)q)[j];
-                    float test2 = ((float *)q2)[j];
-                    if (relaxedMode)
-                        correct = f->rfunc.f_fpf(s[j], &correct2);
-                    else
-                        correct = f->func.f_fpf(s[j], &correct2);
-                    // Per section 10 paragraph 6, accept any result if an input
-                    // or output is a infinity or NaN or overflow
-                    if (relaxedMode || skipNanInf)
-                    {
-                        if (skipNanInf && overflow[j]) continue;
-                        // Note: no double rounding here.  Reference functions
-                        // calculate in single precision.
-                        if (IsFloatInfinity(correct) || IsFloatNaN(correct)
-                            || IsFloatInfinity(correct2) || IsFloatNaN(correct2)
-                            || IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
-                            continue;
-                    }
-                    typedef int (*CheckForSubnormal)(
-                        double, float); // If we are in fast relaxed math, we
-                                        // have a different calculation for the
-                                        // subnormal threshold.
-                    CheckForSubnormal isFloatResultSubnormalPtr;
-                    if (relaxedMode)
-                    {
-                        err = Abs_Error(test, correct);
-                        err2 = Abs_Error(test2, correct2);
-                        isFloatResultSubnormalPtr =
-                            &IsFloatResultSubnormalAbsError;
-                    }
-                    else
-                    {
-                        err = Ulp_Error(test, correct);
-                        err2 = Ulp_Error(test2, correct2);
-                        isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
-                    }
-                    int fail = !(fabsf(err) <= float_ulps
-                                 && fabsf(err2) <= float_ulps);
-                    if (ftz)
-                    {
-                        // retry per section
-                        if ((*isFloatResultSubnormalPtr)(correct, float_ulps))
-                        {
-                            if ((*isFloatResultSubnormalPtr)(correct2,
-                                                             float_ulps))
-                            {
-                                fail = fail && !(test == 0.0f && test2 == 0.0f);
-                                if (!fail)
-                                {
-                                    err = 0.0f;
-                                    err2 = 0.0f;
-                                }
-                            }
-                            else
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && fabsf(err2) <= float_ulps);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                        else if ((*isFloatResultSubnormalPtr)(correct2,
-                                                              float_ulps))
-                        {
-                            fail = fail
-                                && !(test2 == 0.0f && fabsf(err) <= float_ulps);
-                            if (!fail) err2 = 0.0f;
-                        }
-                        // retry per section
-                        if (IsFloatSubnormal(s[j]))
-                        {
-                            double correctp, correctn;
-                            double correct2p, correct2n;
-                            float errp, err2p, errn, err2n;
-                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
-                            if (relaxedMode)
-                            {
-                                correctp = f->rfunc.f_fpf(0.0, &correct2p);
-                                correctn = f->rfunc.f_fpf(-0.0, &correct2n);
-                            }
-                            else
-                            {
-                                correctp = f->func.f_fpf(0.0, &correct2p);
-                                correctn = f->func.f_fpf(-0.0, &correct2n);
-                            }
-                            // Per section 10 paragraph 6, accept any result if
-                            // an input or output is a infinity or NaN or
-                            // overflow
-                            if (skipNanInf)
-                            {
-                                if (fetestexcept(FE_OVERFLOW)) continue;
-                                // Note: no double rounding here.  Reference
-                                // functions calculate in single precision.
-                                if (IsFloatInfinity(correctp)
-                                    || IsFloatNaN(correctp)
-                                    || IsFloatInfinity(correctn)
-                                    || IsFloatNaN(correctn)
-                                    || IsFloatInfinity(correct2p)
-                                    || IsFloatNaN(correct2p)
-                                    || IsFloatInfinity(correct2n)
-                                    || IsFloatNaN(correct2n))
-                                    continue;
-                            }
-                            if (relaxedMode)
-                            {
-                                errp = Abs_Error(test, correctp);
-                                err2p = Abs_Error(test, correct2p);
-                                errn = Abs_Error(test, correctn);
-                                err2n = Abs_Error(test, correct2n);
-                            }
-                            else
-                            {
-                                errp = Ulp_Error(test, correctp);
-                                err2p = Ulp_Error(test, correct2p);
-                                errn = Ulp_Error(test, correctn);
-                                err2n = Ulp_Error(test, correct2n);
-                            }
-                            fail = fail
-                                && ((!(fabsf(errp) <= float_ulps))
-                                    && (!(fabsf(err2p) <= float_ulps))
-                                    && ((!(fabsf(errn) <= float_ulps))
-                                        && (!(fabsf(err2n) <= float_ulps))));
-                            if (fabsf(errp) < fabsf(err)) err = errp;
-                            if (fabsf(errn) < fabsf(err)) err = errn;
-                            if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
-                            if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
-                            // retry per section
-                            if ((*isFloatResultSubnormalPtr)(correctp,
-                                                             float_ulps)
-                                || (*isFloatResultSubnormalPtr)(correctn,
-                                                                float_ulps))
-                            {
-                                if ((*isFloatResultSubnormalPtr)(correct2p,
-                                                                 float_ulps)
-                                    || (*isFloatResultSubnormalPtr)(correct2n,
-                                                                    float_ulps))
-                                {
-                                    fail = fail
-                                        && !(test == 0.0f && test2 == 0.0f);
-                                    if (!fail) err = err2 = 0.0f;
-                                }
-                                else
-                                {
-                                    fail = fail
-                                        && !(test == 0.0f
-                                             && fabsf(err2) <= float_ulps);
-                                    if (!fail) err = 0.0f;
-                                }
-                            }
-                            else if ((*isFloatResultSubnormalPtr)(correct2p,
-                                                                  float_ulps)
-                                     || (*isFloatResultSubnormalPtr)(
-                                         correct2n, float_ulps))
-                            {
-                                fail = fail
-                                    && !(test2 == 0.0f
-                                         && (fabsf(err) <= float_ulps));
-                                if (!fail) err2 = 0.0f;
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError0)
-                    {
-                        maxError0 = fabsf(err);
-                        maxErrorVal0 = s[j];
-                    }
-                    if (fabsf(err2) > maxError1)
-                    {
-                        maxError1 = fabsf(err2);
-                        maxErrorVal1 = s[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error("\nERROR: %s%s: {%f, %f} ulp error at %a: "
-                                   "*{%a, %a} vs. {%a, %a}\n",
-                                   f->name, sizeNames[k], err, err2,
-                                   ((float *)gIn)[j], ((float *)gOut_Ref)[j],
-                                   ((float *)gOut_Ref2)[j], test, test2);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-        if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
-             maxErrorVal1);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_i.cpp b/test_conformance/math_brute_force/unary_two_results_i.cpp
new file mode 100644
index 0000000..f5cc1e3
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_two_results_i.cpp
@@ -0,0 +1,801 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <limits.h>
+#include <string.h>
+#include "FunctionList.h"
+int TestFunc_FloatI_Float(const Func *f, MTdata);
+int TestFunc_DoubleI_Double(const Func *f, MTdata);
+extern const vtbl _unary_two_results_i = { "unary_two_results_i",
+                                           TestFunc_FloatI_Float,
+                                           TestFunc_DoubleI_Double };
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global int* out2, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 iout = INT_MIN;\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( iout, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       int3 iout = INT_MIN;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = iout.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = iout.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global int* out2, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                        "       int3 iout = INT_MIN;\n"
+                        "       f0 = ", name, "( f0, &iout );\n"
+                        "       vstore3( f0, 0, out + 3*i );\n"
+                        "       vstore3( iout, 0, out2 + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       int3 iout = INT_MIN;\n"
+                        "       double3 f0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       f0 = ", name, "( f0, &iout );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = f0.y; \n"
+                        "               out2[3*i+1] = iout.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = f0.x; \n"
+                        "               out2[3*i] = iout.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+cl_ulong  abs_cl_long( cl_long i );
+cl_ulong  abs_cl_long( cl_long i )
+    cl_long mask = i >> 63;
+    return (i ^ mask) - mask;
+int TestFunc_FloatI_Float(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    float float_ulps;
+     uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
+    cl_ulong  maxiError;
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+    maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0;
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j * scale;
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        int *r2 = (int *)gOut_Ref2;
+        float *s = (float *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = (float) f->func.f_fpI( s[j], r2+j );
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                int32_t *q2 = (int32_t *)(gOut2[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    float test = ((float*) q)[j];
+                    int correct2 = INT_MIN;
+                    double correct = f->func.f_fpI( s[j], &correct2 );
+                    float err = Ulp_Error( test, correct );
+                    cl_long iErr = (int64_t) q2[j] - (int64_t) correct2;
+                    int fail = ! (fabsf(err) <= float_ulps && abs_cl_long( iErr ) <= maxiError );
+                    if( ftz )
+                    {
+                        // retry per section
+                        if( IsFloatResultSubnormal(correct, float_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        // retry per section
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            int correct5, correct6;
+                            double correct3 = f->func.f_fpI( 0.0, &correct5 );
+                            double correct4 = f->func.f_fpI( -0.0, &correct6 );
+                            float err2 = Ulp_Error( test, correct3  );
+                            float err3 = Ulp_Error( test, correct4  );
+                            cl_long iErr2 = (long long) q2[j] - (long long) correct5;
+                            cl_long iErr3 = (long long) q2[j] - (long long) correct6;
+                            // Did +0 work?
+                            if( fabsf(err2) <= float_ulps && abs_cl_long( iErr2 ) <= maxiError )
+                            {
+                                err = err2;
+                                iErr = iErr2;
+                                fail = 0;
+                            }
+                            // Did -0 work?
+                            else if(fabsf(err3) <= float_ulps && abs_cl_long( iErr3 ) <= maxiError)
+                            {
+                                err = err3;
+                                iErr = iErr3;
+                                fail = 0;
+                            }
+                            // retry per section
+                            if( fail && (IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps )) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (abs_cl_long( iErr2 ) <= maxiError || abs_cl_long( iErr3 ) <= maxiError) );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    iErr = 0;
+                                }
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: {%f, %d} ulp error at %a: *{%a, %d} vs. {%a, %d}\n", f->name, sizeNames[k], err, (int) iErr, ((float*) gIn)[j], ((float*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], test, q2[j] );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
+int TestFunc_DoubleI_Double(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    cl_ulong  maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( double );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    Force64BitFPUPrecision();
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j * scale);
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        int *r2 = (int *)gOut_Ref2;
+        double *s = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            r[j] = (double) f->dfunc.f_fpI( s[j], r2+j );
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)(gOut[k]);
+                int32_t *q2 = (int32_t *)(gOut2[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    double test = ((double*) q)[j];
+                    int correct2 = INT_MIN;
+                    long double correct = f->dfunc.f_fpI( s[j], &correct2 );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    cl_long iErr = (long long) q2[j] - (long long) correct2;
+                    int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError );
+                    if( ftz )
+                    {
+                        // retry per section
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                        // retry per section
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            int correct5, correct6;
+                            long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 );
+                            long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                            cl_long iErr2 = (long long) q2[j] - (long long) correct5;
+                            cl_long iErr3 = (long long) q2[j] - (long long) correct6;
+                            // Did +0 work?
+                            if( fabsf(err2) <= f->double_ulps && abs_cl_long( iErr2 ) <= maxiError )
+                            {
+                                err = err2;
+                                iErr = iErr2;
+                                fail = 0;
+                            }
+                            // Did -0 work?
+                            else if(fabsf(err3) <= f->double_ulps && abs_cl_long( iErr3 ) <= maxiError)
+                            {
+                                err = err3;
+                                iErr = iErr3;
+                                fail = 0;
+                            }
+                            // retry per section
+                            if( fail && (IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (abs_cl_long( iErr2 ) <= maxiError || abs_cl_long( iErr3 ) <= maxiError) );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    iErr = 0;
+                                }
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: {%f, %d} ulp error at %.13la: *{%.13la, %d} vs. {%.13la, %d}\n", f->name, sizeNames[k], err, (int) iErr, ((double*) gIn)[j], ((double*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], test, q2[j] );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sd%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
deleted file mode 100644
index 14d1fb9..0000000
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ /dev/null
@@ -1,418 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <climits>
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global int* out2, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 iout = INT_MIN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       int3 iout = INT_MIN;\n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-static cl_ulong abs_cl_long(cl_long i)
-    cl_long mask = i >> 63;
-    return (i ^ mask) - mask;
-int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int64_t maxError2 = 0;
-    int ftz = f->ftz || gForceFTZ;
-    double maxErrorVal = 0.0f;
-    double maxErrorVal2 = 0.0f;
-    cl_ulong maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
-    uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
-    int scale =
-        (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1);
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    Force64BitFPUPrecision();
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        double *p = (double *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-                p[j] = DoubleFromUInt32((uint32_t)i + j);
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        double *r = (double *)gOut_Ref;
-        int *r2 = (int *)gOut_Ref2;
-        double *s = (double *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-            r[j] = (double)f->dfunc.f_fpI(s[j], r2 + j);
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint64_t *t = (uint64_t *)gOut_Ref;
-        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint64_t *q = (uint64_t *)(gOut[k]);
-                int32_t *q2 = (int32_t *)(gOut2[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j] || t2[j] != q2[j])
-                {
-                    double test = ((double *)q)[j];
-                    int correct2 = INT_MIN;
-                    long double correct = f->dfunc.f_fpI(s[j], &correct2);
-                    float err = Bruteforce_Ulp_Error_Double(test, correct);
-                    cl_long iErr = (long long)q2[j] - (long long)correct2;
-                    int fail = !(fabsf(err) <= f->double_ulps
-                                 && abs_cl_long(iErr) <= maxiError);
-                    if (ftz)
-                    {
-                        // retry per section
-                        if (IsDoubleResultSubnormal(correct, f->double_ulps))
-                        {
-                            fail = fail && !(test == 0.0f && iErr == 0);
-                            if (!fail) err = 0.0f;
-                        }
-                        // retry per section
-                        if (IsDoubleSubnormal(s[j]))
-                        {
-                            int correct5, correct6;
-                            long double correct3 =
-                                f->dfunc.f_fpI(0.0, &correct5);
-                            long double correct4 =
-                                f->dfunc.f_fpI(-0.0, &correct6);
-                            float err2 =
-                                Bruteforce_Ulp_Error_Double(test, correct3);
-                            float err3 =
-                                Bruteforce_Ulp_Error_Double(test, correct4);
-                            cl_long iErr2 =
-                                (long long)q2[j] - (long long)correct5;
-                            cl_long iErr3 =
-                                (long long)q2[j] - (long long)correct6;
-                            // Did +0 work?
-                            if (fabsf(err2) <= f->double_ulps
-                                && abs_cl_long(iErr2) <= maxiError)
-                            {
-                                err = err2;
-                                iErr = iErr2;
-                                fail = 0;
-                            }
-                            // Did -0 work?
-                            else if (fabsf(err3) <= f->double_ulps
-                                     && abs_cl_long(iErr3) <= maxiError)
-                            {
-                                err = err3;
-                                iErr = iErr3;
-                                fail = 0;
-                            }
-                            // retry per section
-                            if (fail
-                                && (IsDoubleResultSubnormal(correct2,
-                                                            f->double_ulps)
-                                    || IsDoubleResultSubnormal(correct3,
-                                                               f->double_ulps)))
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && (abs_cl_long(iErr2) <= maxiError
-                                             || abs_cl_long(iErr3)
-                                                 <= maxiError));
-                                if (!fail)
-                                {
-                                    err = 0.0f;
-                                    iErr = 0;
-                                }
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                    }
-                    if (llabs(iErr) > maxError2)
-                    {
-                        maxError2 = llabs(iErr);
-                        maxErrorVal2 = s[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error("\nERROR: %sD%s: {%f, %d} ulp error at "
-                                   "%.13la: *{%.13la, %d} vs. {%.13la, %d}\n",
-                                   f->name, sizeNames[k], err, (int)iErr,
-                                   ((double *)gIn)[j], ((double *)gOut_Ref)[j],
-                                   ((int *)gOut_Ref2)[j], test, q2[j]);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
deleted file mode 100644
index 23b0d70..0000000
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ /dev/null
@@ -1,416 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <climits>
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global int* out2, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 iout = INT_MIN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       int3 iout = INT_MIN;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-static cl_ulong abs_cl_long(cl_long i)
-    cl_long mask = i >> 63;
-    return (i ^ mask) - mask;
-int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int64_t maxError2 = 0;
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    float maxErrorVal = 0.0f;
-    float maxErrorVal2 = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1);
-    cl_ulong maxiError;
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    float float_ulps;
-    if (gIsEmbedded)
-        float_ulps = f->float_embedded_ulps;
-    else
-        float_ulps = f->float_ulps;
-    maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0;
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (uint32_t)i + j * scale;
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (uint32_t)i + j;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        float *r = (float *)gOut_Ref;
-        int *r2 = (int *)gOut_Ref2;
-        float *s = (float *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            r[j] = (float)f->func.f_fpI(s[j], r2 + j);
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        int32_t *t2 = (int32_t *)gOut_Ref2;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-                int32_t *q2 = (int32_t *)(gOut2[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j] || t2[j] != q2[j])
-                {
-                    float test = ((float *)q)[j];
-                    int correct2 = INT_MIN;
-                    double correct = f->func.f_fpI(s[j], &correct2);
-                    float err = Ulp_Error(test, correct);
-                    cl_long iErr = (int64_t)q2[j] - (int64_t)correct2;
-                    int fail = !(fabsf(err) <= float_ulps
-                                 && abs_cl_long(iErr) <= maxiError);
-                    if (ftz)
-                    {
-                        // retry per section
-                        if (IsFloatResultSubnormal(correct, float_ulps))
-                        {
-                            fail = fail && !(test == 0.0f && iErr == 0);
-                            if (!fail) err = 0.0f;
-                        }
-                        // retry per section
-                        if (IsFloatSubnormal(s[j]))
-                        {
-                            int correct5, correct6;
-                            double correct3 = f->func.f_fpI(0.0, &correct5);
-                            double correct4 = f->func.f_fpI(-0.0, &correct6);
-                            float err2 = Ulp_Error(test, correct3);
-                            float err3 = Ulp_Error(test, correct4);
-                            cl_long iErr2 =
-                                (long long)q2[j] - (long long)correct5;
-                            cl_long iErr3 =
-                                (long long)q2[j] - (long long)correct6;
-                            // Did +0 work?
-                            if (fabsf(err2) <= float_ulps
-                                && abs_cl_long(iErr2) <= maxiError)
-                            {
-                                err = err2;
-                                iErr = iErr2;
-                                fail = 0;
-                            }
-                            // Did -0 work?
-                            else if (fabsf(err3) <= float_ulps
-                                     && abs_cl_long(iErr3) <= maxiError)
-                            {
-                                err = err3;
-                                iErr = iErr3;
-                                fail = 0;
-                            }
-                            // retry per section
-                            if (fail
-                                && (IsFloatResultSubnormal(correct2, float_ulps)
-                                    || IsFloatResultSubnormal(correct3,
-                                                              float_ulps)))
-                            {
-                                fail = fail
-                                    && !(test == 0.0f
-                                         && (abs_cl_long(iErr2) <= maxiError
-                                             || abs_cl_long(iErr3)
-                                                 <= maxiError));
-                                if (!fail)
-                                {
-                                    err = 0.0f;
-                                    iErr = 0;
-                                }
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                    }
-                    if (llabs(iErr) > maxError2)
-                    {
-                        maxError2 = llabs(iErr);
-                        maxErrorVal2 = s[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error("\nERROR: %s%s: {%f, %d} ulp error at %a: "
-                                   "*{%a, %d} vs. {%a, %d}\n",
-                                   f->name, sizeNames[k], err, (int)iErr,
-                                   ((float *)gIn)[j], ((float *)gOut_Ref)[j],
-                                   ((int *)gOut_Ref2)[j], test, q2[j]);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/unary_u.cpp b/test_conformance/math_brute_force/unary_u.cpp
new file mode 100644
index 0000000..690b6e7
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_u.cpp
@@ -0,0 +1,692 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "Utility.h"
+#include <string.h>
+#include "FunctionList.h"
+int TestFunc_Float_UInt(const Func *f, MTdata);
+int TestFunc_Double_ULong(const Func *f, MTdata);
+extern const vtbl _unary_u = { "unary_u", TestFunc_Float_UInt,
+                               TestFunc_Double_ULong };
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = {
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global uint", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global uint* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       uint3 u0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f0 = ", name, "( u0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       uint3 u0;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               u0 = (uint3)( in[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               u0 = (uint3)( in[3*i], in[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( u0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+    const char *c[] = {
+                            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global ulong", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global ulong* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       ulong3 u0 = vload3( 0, in + 3 * i );\n"
+                        "       double3 f0 = ", name, "( u0 );\n"
+                        "       vstore3( f0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       ulong3 u0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, 0xdeaddeaddeaddeadUL ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               u0 = (ulong3)( in[3*i], in[3*i+1], 0xdeaddeaddeaddeadUL ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       double3 f0 = ", name, "( u0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = f0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = f0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+typedef struct BuildKernelInfo
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+int TestFunc_Float_UInt(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
+    int isRangeLimited = 0;
+    float float_ulps;
+    float half_sin_cos_tan_limit = 0;
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded)
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    if( 0 == strcmp( f->name, "half_sin") || 0 == strcmp( f->name, "half_cos") )
+    {
+        isRangeLimited = 1;
+        half_sin_cos_tan_limit = 1.0f + float_ulps * (FLT_EPSILON/2.0f);             // out of range results from finite inputs must be in [-1,1]
+    }
+    else if( 0 == strcmp( f->name, "half_tan"))
+    {
+        isRangeLimited = 1;
+        half_sin_cos_tan_limit = INFINITY;             // out of range resut from finite inputs must be numeric
+    }
+    for( i = 0; i < (1ULL<<32); i += step  )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j * scale;
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL)))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ))){ LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)))
+            {
+                vlog_error( "FAILURE -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        float *r = (float*) gOut_Ref;
+        cl_uint *s = (cl_uint*) gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = (float) f->func.f_u( s[j] );
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint32_t *t = (uint32_t*) gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t*)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float test = ((float*) q)[j];
+                    double correct = f->func.f_u( s[j] );
+                    float err = Ulp_Error( test, correct );
+                    int fail = ! (fabsf(err) <= float_ulps);
+                    // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
+                    if( isRangeLimited && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) && fabsf(s[j]) < INFINITY )
+                    {
+                        if( fabsf( test ) <= half_sin_cos_tan_limit )
+                        {
+                            err = 0;
+                            fail = 0;
+                        }
+                    }
+                     if( fail )
+                    {
+                        if( ftz )
+                        {
+                            // retry per section
+                            if( IsFloatResultSubnormal(correct, float_ulps) )
+                            {
+                                fail = fail && ( test != 0.0f );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n", f->name, sizeNames[k], err, ((uint32_t*) gIn)[j], ((float*) gOut_Ref)[j], test );
+                      error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t*)gIn;
+        if( strstr( f->name, "exp" ) || strstr( f->name, "sin" ) || strstr( f->name, "cos" ) || strstr( f->name, "tan" ) )
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                ((float*)p)[j] = (float) genrand_real1(d);
+        else if( strstr( f->name, "log" ) )
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = genrand_int32(d) & 0x7fffffff;
+        else
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILURE -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
+static cl_ulong random64( MTdata d )
+    return (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+int TestFunc_Double_ULong(const Func *f, MTdata d)
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( cl_double );
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    Force64BitFPUPrecision();
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+    for( i = 0; i < (1ULL<<32); i += step  )
+    {
+        //Init input array
+        cl_ulong *p = (cl_ulong *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_ulong ); j++ )
+            p[j] = random64(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL)))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ))){ LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)))
+            {
+                vlog_error( "FAILURE -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+        //Calculate the correctly rounded reference result
+        double *r = (double*) gOut_Ref;
+        cl_ulong *s = (cl_ulong*) gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+            r[j] = (double) f->dfunc.f_u( s[j] );
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+        if( gSkipCorrectnessTesting )
+            break;
+        //Verify data
+        uint64_t *t = (uint64_t*) gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t*)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    double test = ((double*) q)[j];
+                    long double correct = f->dfunc.f_u( s[j] );
+                    float err = Bruteforce_Ulp_Error_Double(test, correct);
+                    int fail = ! (fabsf(err) <= f->double_ulps);
+                    // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
+                    if( fail )
+                    {
+                        if( ftz )
+                        {
+                            // retry per section
+                            if( IsDoubleResultSubnormal(correct, f->double_ulps) )
+                            {
+                                fail = fail && ( test != 0.0 );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\n%s%sD: %f ulp error at 0x%16.16llx: *%.13la vs. %.13la\n", f->name, sizeNames[k], err, ((uint64_t*) gIn)[j], ((double*) gOut_Ref)[j], test );
+                      error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double*) gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = random64(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILURE -- could not execute kernel\n" );
+                    goto exit;
+                }
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+    vlog( "\n" );
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+    return error;
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
deleted file mode 100644
index 3c5f99d..0000000
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ /dev/null
@@ -1,314 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global ulong",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global ulong* in                 )\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       ulong3 u0 = vload3( 0, in + 3 * i );\n"
-        "       double3 f0 = ",
-        name,
-        "( u0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       ulong3 u0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, "
-        "0xdeaddeaddeaddeadUL ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               u0 = (ulong3)( in[3*i], in[3*i+1], "
-        "0xdeaddeaddeaddeadUL ); \n"
-        "               break;\n"
-        "       }\n"
-        "       double3 f0 = ",
-        name,
-        "( u0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-static cl_ulong random64(MTdata d)
-    return (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
-int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int ftz = f->ftz || gForceFTZ;
-    double maxErrorVal = 0.0f;
-    uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    Force64BitFPUPrecision();
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        cl_ulong *p = (cl_ulong *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_ulong); j++)
-            p[j] = random64(d);
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        double *r = (double *)gOut_Ref;
-        cl_ulong *s = (cl_ulong *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-            r[j] = (double)f->dfunc.f_u(s[j]);
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint64_t *t = (uint64_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint64_t *q = (uint64_t *)(gOut[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    double test = ((double *)q)[j];
-                    long double correct = f->dfunc.f_u(s[j]);
-                    float err = Bruteforce_Ulp_Error_Double(test, correct);
-                    int fail = !(fabsf(err) <= f->double_ulps);
-                    if (fail)
-                    {
-                        if (ftz)
-                        {
-                            // retry per section
-                            if (IsDoubleResultSubnormal(correct,
-                                                        f->double_ulps))
-                            {
-                                fail = fail && (test != 0.0);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error("\n%s%sD: %f ulp error at 0x%16.16llx: "
-                                   "*%.13la vs. %.13la\n",
-                                   f->name, sizeNames[k], err,
-                                   ((uint64_t *)gIn)[j],
-                                   ((double *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
deleted file mode 100644
index 44c5af4..0000000
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ /dev/null
@@ -1,316 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "function_list.h"
-#include "test_functions.h"
-#include "utility.h"
-#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global uint",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global uint* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       uint3 u0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f0 = ",
-        name,
-        "( u0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       uint3 u0;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               u0 = (uint3)( in[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               u0 = (uint3)( in[3*i], in[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( u0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-typedef struct BuildKernelInfo
-    cl_uint offset; // the first vector size to build
-    cl_kernel *kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
-int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
-    int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
-    float maxError = 0.0f;
-    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    float maxErrorVal = 0.0f;
-    uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
-    int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(double)) + 1);
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-    float float_ulps;
-    if (gIsEmbedded)
-        float_ulps = f->float_embedded_ulps;
-    else
-        float_ulps = f->float_ulps;
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
-    for (uint64_t i = 0; i < (1ULL << 32); i += step)
-    {
-        // Init input array
-        uint32_t *p = (uint32_t *)gIn;
-        if (gWimpyMode)
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (uint32_t)i + j * scale;
-        }
-        else
-        {
-            for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-                p[j] = (uint32_t)i + j;
-        }
-        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
-                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
-        {
-            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
-            return error;
-        }
-        // write garbage into output arrays
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
-        }
-        // Run the kernels
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
-            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                goto exit;
-            }
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
-            {
-                vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
-            }
-        }
-        // Get that moving
-        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
-        // Calculate the correctly rounded reference result
-        float *r = (float *)gOut_Ref;
-        cl_uint *s = (cl_uint *)gIn;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-            r[j] = (float)f->func.f_u(s[j]);
-        // Read the data back
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            if ((error =
-                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
-                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
-            {
-                vlog_error("ReadArray failed %d\n", error);
-                goto exit;
-            }
-        }
-        if (gSkipCorrectnessTesting) break;
-        // Verify data
-        uint32_t *t = (uint32_t *)gOut_Ref;
-        for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
-        {
-            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-            {
-                uint32_t *q = (uint32_t *)(gOut[k]);
-                // If we aren't getting the correctly rounded result
-                if (t[j] != q[j])
-                {
-                    float test = ((float *)q)[j];
-                    double correct = f->func.f_u(s[j]);
-                    float err = Ulp_Error(test, correct);
-                    int fail = !(fabsf(err) <= float_ulps);
-                    if (fail)
-                    {
-                        if (ftz)
-                        {
-                            // retry per section
-                            if (IsFloatResultSubnormal(correct, float_ulps))
-                            {
-                                fail = fail && (test != 0.0f);
-                                if (!fail) err = 0.0f;
-                            }
-                        }
-                    }
-                    if (fabsf(err) > maxError)
-                    {
-                        maxError = fabsf(err);
-                        maxErrorVal = s[j];
-                    }
-                    if (fail)
-                    {
-                        vlog_error(
-                            "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n",
-                            f->name, sizeNames[k], err, ((uint32_t *)gIn)[j],
-                            ((float *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
-                    }
-                }
-            }
-        }
-        if (0 == (i & 0x0fffffff))
-        {
-            if (gVerboseBruteForce)
-            {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
-            }
-            else
-            {
-                vlog(".");
-            }
-            fflush(stdout);
-        }
-    }
-    if (!gSkipCorrectnessTesting)
-    {
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
-    }
-    vlog("\n");
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
-    }
-    return error;
diff --git a/test_conformance/math_brute_force/utility.cpp b/test_conformance/math_brute_force/utility.cpp
deleted file mode 100644
index 9b0191a..0000000
--- a/test_conformance/math_brute_force/utility.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "utility.h"
-#include "function_list.h"
-#if defined(__PPC__)
-// Global varaiable used to hold the FPU control register state. The FPSCR
-// register can not be used because not all Power implementations retain or
-// observed the NI (non-IEEE mode) bit.
-__thread fpu_control_t fpu_control = 0;
-void MulD(double *rhi, double *rlo, double u, double v)
-    const double c = 134217729.0; // 1+2^27
-    double up, u1, u2, vp, v1, v2;
-    up = u * c;
-    u1 = (u - up) + up;
-    u2 = u - u1;
-    vp = v * c;
-    v1 = (v - vp) + vp;
-    v2 = v - v1;
-    double rh = u * v;
-    double rl = (((u1 * v1 - rh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2);
-    *rhi = rh;
-    *rlo = rl;
-void AddD(double *rhi, double *rlo, double a, double b)
-    double zhi, zlo;
-    zhi = a + b;
-    if (fabs(a) > fabs(b))
-    {
-        zlo = zhi - a;
-        zlo = b - zlo;
-    }
-    else
-    {
-        zlo = zhi - b;
-        zlo = a - zlo;
-    }
-    *rhi = zhi;
-    *rlo = zlo;
-void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
-    double mh, ml;
-    double c = 134217729.0;
-    double up, u1, u2, vp, v1, v2;
-    up = xh * c;
-    u1 = (xh - up) + up;
-    u2 = xh - u1;
-    vp = yh * c;
-    v1 = (yh - vp) + vp;
-    v2 = yh - v1;
-    mh = xh * yh;
-    ml = (((u1 * v1 - mh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2);
-    ml += xh * yl + xl * yh;
-    *rhi = mh + ml;
-    *rlo = (mh - (*rhi)) + ml;
-void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
-    double r, s;
-    r = xh + yh;
-    s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl)
-                              : (yh - r + xh + xl + yl);
-    *rhi = r + s;
-    *rlo = (r - (*rhi)) + s;
-void DivideDD(double *chi, double *clo, double a, double b)
-    *chi = a / b;
-    double rhi, rlo;
-    MulD(&rhi, &rlo, *chi, b);
-    AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0);
-    *clo = rhi / b;
-// These functions comapre two floats/doubles. Since some platforms may choose
-// to flush denormals to zeros before comparison, comparison like a < b may give
-// wrong result in "certain cases" where we do need correct compasion result
-// when operands are denormals .... these functions comapre floats/doubles using
-// signed integer/long int rep. In other cases, when flushing to zeros is fine,
-// these should not be used. Also these doesn't check for nans and assume nans
-// are handled separately as special edge case by the caller which calls these
-// functions return 0 if both are equal, 1 if x > y and -1 if x < y.
-inline int compareFloats(float x, float y)
-    int32f_t a, b;
-    a.f = x;
-    b.f = y;
-    if (a.i & 0x80000000) a.i = 0x80000000 - a.i;
-    if (b.i & 0x80000000) b.i = 0x80000000 - b.i;
-    if (a.i == b.i) return 0;
-    return a.i < b.i ? -1 : 1;
-inline int compareDoubles(double x, double y)
-    int64d_t a, b;
-    a.d = x;
-    b.d = y;
-    if (a.l & 0x8000000000000000LL) a.l = 0x8000000000000000LL - a.l;
-    if (b.l & 0x8000000000000000LL) b.l = 0x8000000000000000LL - b.l;
-    if (a.l == b.l) return 0;
-    return a.l < b.l ? -1 : 1;
-void logFunctionInfo(const char *fname, unsigned int float_size,
-                     unsigned int isFastRelaxed)
-    char const *fpSizeStr = NULL;
-    char const *fpFastRelaxedStr = "";
-    switch (float_size)
-    {
-        case sizeof(cl_double): fpSizeStr = "fp64"; break;
-        case sizeof(cl_float): fpSizeStr = "fp32"; break;
-        case sizeof(cl_half): fpSizeStr = "fp16"; break;
-    }
-    if (isFastRelaxed)
-    {
-        fpFastRelaxedStr = "rlx";
-    }
-    vlog("%15s %4s %4s", fname, fpSizeStr, fpFastRelaxedStr);
-float getAllowedUlpError(const Func *f, const bool relaxed)
-    float ulp;
-    if (relaxed)
-    {
-        if (gIsEmbedded)
-        {
-            ulp = f->relaxed_embedded_error;
-        }
-        else
-        {
-            ulp = f->relaxed_error;
-        }
-    }
-    else
-    {
-        if (gIsEmbedded)
-        {
-            ulp = f->float_embedded_ulps;
-        }
-        else
-        {
-            ulp = f->float_ulps;
-        }
-    }
-    return ulp;
diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h
deleted file mode 100644
index ac4db9c..0000000
--- a/test_conformance/math_brute_force/utility.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef UTILITY_H
-#define UTILITY_H
-#include "harness/compat.h"
-#include "harness/rounding_mode.h"
-#include "harness/fpcontrol.h"
-#include "harness/testHarness.h"
-#include "harness/ThreadPool.h"
-#include "harness/conversions.h"
-#define BUFFER_SIZE (1024 * 1024 * 2)
-#if defined(__GNUC__)
-#define UNUSED __attribute__((unused))
-#define UNUSED
-struct Func;
-extern int gWimpyReductionFactor;
-extern const char *sizeNames[VECTOR_SIZE_COUNT];
-extern const int sizeValues[VECTOR_SIZE_COUNT];
-extern cl_device_id gDevice;
-extern cl_context gContext;
-extern cl_command_queue gQueue;
-extern void *gIn;
-extern void *gIn2;
-extern void *gIn3;
-extern void *gOut_Ref;
-extern void *gOut_Ref2;
-extern void *gOut[VECTOR_SIZE_COUNT];
-extern void *gOut2[VECTOR_SIZE_COUNT];
-extern cl_mem gInBuffer;
-extern cl_mem gInBuffer2;
-extern cl_mem gInBuffer3;
-extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT];
-extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT];
-extern int gSkipCorrectnessTesting;
-extern int gForceFTZ;
-extern int gFastRelaxedDerived;
-extern int gWimpyMode;
-extern int gIsInRTZMode;
-extern int gInfNanSupport;
-extern int gIsEmbedded;
-extern int gVerboseBruteForce;
-extern uint32_t gMaxVectorSizeIndex;
-extern uint32_t gMinVectorSizeIndex;
-extern cl_device_fp_config gFloatCapabilities;
-#define LOWER_IS_BETTER 0
-#include "harness/errorHelpers.h"
-#if defined(_MSC_VER)
-// Deal with missing scalbn on windows
-#define scalbnf(_a, _i) ldexpf(_a, _i)
-#define scalbn(_a, _i) ldexp(_a, _i)
-#define scalbnl(_a, _i) ldexpl(_a, _i)
-float Abs_Error(float test, double reference);
-float Ulp_Error(float test, double reference);
-float Bruteforce_Ulp_Error_Double(double test, long double reference);
-int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
-               cl_program *p, bool relaxedMode);
-int MakeKernels(const char **c, cl_uint count, const char *name,
-                cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                bool relaxedMode);
-// used to convert a bucket of bits into a search pattern through double
-static inline double DoubleFromUInt32(uint32_t bits);
-static inline double DoubleFromUInt32(uint32_t bits)
-    union {
-        uint64_t u;
-        double d;
-    } u;
-    // split 0x89abcdef to 0x89abc00000000def
-    u.u = bits & 0xfffU;
-    u.u |= (uint64_t)(bits & ~0xfffU) << 32;
-    // sign extend the leading bit of def segment as sign bit so that the middle
-    // region consists of either all 1s or 0s
-    u.u -= (bits & 0x800U) << 1;
-    // return result
-    return u.d;
-void _LogBuildError(cl_program p, int line, const char *file);
-#define LogBuildError(program) _LogBuildError(program, __LINE__, __FILE__)
-// The spec is fairly clear that we may enforce a hard cutoff to prevent
-// premature flushing to zero.
-// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +
-// ulp_limit to be flushed to zero.
-static inline int IsFloatResultSubnormal(double x, float ulps)
-    x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps;
-    return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
-static inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
-    x = x - abs_err;
-    return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
-static inline int IsDoubleResultSubnormal(long double x, float ulps)
-    x = fabsl(x) - MAKE_HEX_LONG(0x1.0p-1074, 0x1, -1074) * (long double)ulps;
-    return x < MAKE_HEX_LONG(0x1.0p-1022, 0x1, -1022);
-static inline int IsFloatInfinity(double x)
-    union {
-        cl_float d;
-        cl_uint u;
-    } u;
-    u.d = (cl_float)x;
-    return ((u.u & 0x7fffffffU) == 0x7F800000U);
-static inline int IsFloatMaxFloat(double x)
-    union {
-        cl_float d;
-        cl_uint u;
-    } u;
-    u.d = (cl_float)x;
-    return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
-static inline int IsFloatNaN(double x)
-    union {
-        cl_float d;
-        cl_uint u;
-    } u;
-    u.d = (cl_float)x;
-    return ((u.u & 0x7fffffffU) > 0x7F800000U);
-extern cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
-// Windows (since long double got deprecated) sets the x87 to 53-bit precision
-// (that's x87 default state).  This causes problems with the tests that
-// convert long and ulong to float and double or otherwise deal with values
-// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
-static inline void Force64BitFPUPrecision(void)
-#if __MINGW32__
-    // The usual method is to use _controlfp as follows:
-    //     #include <float.h>
-    //     _controlfp(_PC_64, _MCW_PC);
-    //
-    // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
-    // divergent code just use inline assembly which works for both.
-    unsigned short int orig_cw = 0;
-    unsigned short int new_cw = 0;
-    __asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
-    new_cw = orig_cw | 0x0300; // set precision to 64-bit
-    __asm__ __volatile__("fldcw  %0" ::"m"(new_cw));
-#elif defined(_WIN32) && defined(__INTEL_COMPILER)
-    // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not*
-    // work on win.x64: > On the x64 architecture, changing the floating point
-    // precision is not supported. (Taken from
-    //
-    int cw;
-    __asm { fnstcw cw }
-    ; // Get current value of FPU control word.
-    cw = cw & 0xfffffcff
-        | (3 << 8); // Set Precision Control to Double Extended Precision.
-    __asm { fldcw cw }
-    ; // Set new value of FPU control word.
-    /* Implement for other platforms if needed */
-extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
-typedef union {
-    int32_t i;
-    float f;
-} int32f_t;
-typedef union {
-    int64_t l;
-    double d;
-} int64d_t;
-void MulD(double *rhi, double *rlo, double u, double v);
-void AddD(double *rhi, double *rlo, double a, double b);
-void MulDD(double *rhi, double *rlo, double xh, double xl, double yh,
-           double yl);
-void AddDD(double *rhi, double *rlo, double xh, double xl, double yh,
-           double yl);
-void DivideDD(double *chi, double *clo, double a, double b);
-int compareFloats(float x, float y);
-int compareDoubles(double x, double y);
-void logFunctionInfo(const char *fname, unsigned int float_size,
-                     unsigned int isFastRelaxed);
-float getAllowedUlpError(const Func *f, const bool relaxed);
-static inline cl_uint getTestScale(size_t typeSize)
-    if (gWimpyMode)
-    {
-        return (cl_uint)typeSize * 2 * gWimpyReductionFactor;
-    }
-    else if (gIsEmbedded)
-    {
-    }
-    else
-    {
-        return 1;
-    }
-static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
-    if (gWimpyMode)
-    {
-        return (1ULL << 32) * gWimpyReductionFactor / (512);
-    }
-    else if (gIsEmbedded)
-    {
-        return (BUFFER_SIZE / typeSize) * EMBEDDED_REDUCTION_FACTOR;
-    }
-    else
-    {
-        return bufferSize / typeSize;
-    }
-#endif /* UTILITY_H */
diff --git a/test_conformance/mem_host_flags/C_host_memory_block.h b/test_conformance/mem_host_flags/C_host_memory_block.h
index 1d3b475..91b47ab 100644
--- a/test_conformance/mem_host_flags/C_host_memory_block.h
+++ b/test_conformance/mem_host_flags/C_host_memory_block.h
@@ -69,28 +69,31 @@
 template < class T>
-    if (pData != NULL) delete[] pData;
-    num_elements = 0;
+  if (pData!=NULL) delete pData;
+  num_elements = 0;
 template < class T >
 void C_host_memory_block<T>::Init(int num_elem, T & value)
-    if (pData != NULL) delete[] pData;
-    pData = new T[num_elem];
-    for (int i = 0; i < num_elem; i++) pData[i] = value;
+  if (pData!=NULL) delete pData;
+  pData= new T [num_elem];
+  for (int i=0; i<num_elem; i++)
+    pData[i] = value;
-    num_elements = num_elem;
+  num_elements= num_elem;
 template < class T >
 void C_host_memory_block<T>::Init(int num_elem)
-    if (pData != NULL) delete[] pData;
-    pData = new T[num_elem];
-    for (int i = 0; i < num_elem; i++) pData[i] = (T)i;
+  if (pData!=NULL) delete pData;
+  pData = new T [num_elem];
+  for (int i=0; i<num_elem; i++)
+    pData[i]= (T) i;
-    num_elements = num_elem;
+  num_elements = num_elem;
 template < class T >
 void  C_host_memory_block<T>::Set_to_zero()
diff --git a/test_conformance/mem_host_flags/main.cpp b/test_conformance/mem_host_flags/main.cpp
index f064980..01bad67 100644
--- a/test_conformance/mem_host_flags/main.cpp
+++ b/test_conformance/mem_host_flags/main.cpp
@@ -47,5 +47,5 @@
     log_info("1st part, non gl-sharing objects...\n");
     gTestRounding = true;
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/multiple_device_context/main.cpp b/test_conformance/multiple_device_context/main.cpp
index 6e16c24..1027666 100644
--- a/test_conformance/multiple_device_context/main.cpp
+++ b/test_conformance/multiple_device_context/main.cpp
@@ -41,6 +41,6 @@
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, true, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, true, 0 );
diff --git a/test_conformance/multiple_device_context/test_multiple_devices.cpp b/test_conformance/multiple_device_context/test_multiple_devices.cpp
index 59543ad..b6f15f6 100644
--- a/test_conformance/multiple_device_context/test_multiple_devices.cpp
+++ b/test_conformance/multiple_device_context/test_multiple_devices.cpp
@@ -91,7 +91,7 @@
   for( i = 0; i < TEST_SIZE; i++ )
     data[i] = genrand_int32(seed);
-  stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+  stream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
                           sizeof(cl_uint) * TEST_SIZE, data, &error);
   test_error(error, "Unable to create test array");
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
index a4a6a74..de041c2 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
@@ -243,134 +243,75 @@
   NL "}"
   NL ;
-    const cl_device_id &device, const cl_context &context,
-    const cl_command_queue &queue, const cl_uint dims, size_t *globalSize,
-    const size_t *localSize, const size_t *buffersSize,
-    const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize)
-    : _device(device), _context(context), _queue(queue), _dims(dims)
+TestNonUniformWorkGroup::TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context,
+  const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize, const size_t *localSize, const size_t *buffersSize,
+  const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize)
+  : _device(device), _context(context), _queue(queue), _dims (dims) {
-    if (globalSize == NULL || dims < 1 || dims > 3)
-    {
-        // throw std::invalid_argument("globalSize is NULL value.");
-        // This is method of informing that parameters are wrong.
-        // It would be checked by prepareDevice() function.
-        // This is used because of lack of exception support.
-        _globalSize[0] = 0;
-        return;
+  if (globalSize == NULL || dims < 1 || dims > 3) {
+    //throw std::invalid_argument("globalSize is NULL value.");
+    // This is method of informing that parameters are wrong.
+    // It would be checked by prepareDevice() function.
+    // This is used because of lack of exception support.
+    _globalSize[0] = 0;
+    return;
+  }
+  cl_uint i;
+  _globalWorkOffset_IsNull = true;
+  _localSize_IsNull = true;
+  setGlobalWorkgroupSize(globalSize);
+  setLocalWorkgroupSize(globalSize,localSize);
+  for (i = _dims; i < MAX_DIMS; i++) {
+    _globalSize[i] = 1;
+  }
+  for (i = 0; i < MAX_DIMS; i++) {
+    _globalWorkOffset[i] = 0;
+  }
+  if (globalWorkOffset) {
+    _globalWorkOffset_IsNull = false;
+    for (i = 0; i < _dims; i++) {
+      _globalWorkOffset[i] = globalWorkOffset[i];
+  }
-    // For OpenCL-3.0 support for non-uniform workgroups is optional, it's still
-    // useful to run these tests since we can verify the behavior of the
-    // get_enqueued_local_size() builtin for uniform workgroups, so we round up
-    // the global size to insure uniform workgroups on those 3.0 devices.
-    // We only need to do this when localSize is non-null, otherwise the driver
-    // will select a value for localSize which will be uniform on devices that
-    // don't support non-uniform work-groups.
-    if (nullptr != localSize && get_device_cl_version(device) >= Version(3, 0))
-    {
-        // Query for the non-uniform work-group support.
-        cl_bool are_non_uniform_sub_groups_supported{ CL_FALSE };
-        auto error =
-            clGetDeviceInfo(device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT,
-                            sizeof(are_non_uniform_sub_groups_supported),
-                            &are_non_uniform_sub_groups_supported, nullptr);
-        if (error)
-        {
-            print_error(error,
-                        "clGetDeviceInfo failed for "
-                        "CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT");
-            // This signals an error to the caller (see above).
-            _globalSize[0] = 0;
-            return;
-        }
+  for (i = 0; i < MAX_DIMS; i++) {
+    _enqueuedLocalSize[i] = 1;
+  }
-        // If non-uniform work-groups are not supported round up the global
-        // sizes so workgroups are uniform and we have at least one.
-        if (CL_FALSE == are_non_uniform_sub_groups_supported)
-        {
-            log_info(
-                "WARNING: Non-uniform work-groups are not supported on this "
-                "device.\n Running test with uniform work-groups.\n");
-            for (unsigned dim = 0; dim < dims; ++dim)
-            {
-                auto global_size_before = globalSize[dim];
-                auto global_size_rounded = global_size_before
-                    + (localSize[dim] - global_size_before % localSize[dim]);
-                globalSize[dim] = global_size_rounded;
-                log_info("Rounding globalSize[%d] = %d -> %d\n", dim,
-                         global_size_before, global_size_rounded);
-            }
-        }
+  if (localSize) {
+    _localSize_IsNull = false;
+    for (i = 0; i < _dims; i++) {
+      _enqueuedLocalSize[i] = _localSize[i];
+  }
-    cl_uint i;
-    _globalWorkOffset_IsNull = true;
-    _localSize_IsNull = true;
-    setGlobalWorkgroupSize(globalSize);
-    setLocalWorkgroupSize(globalSize, localSize);
-    for (i = _dims; i < MAX_DIMS; i++)
-    {
-        _globalSize[i] = 1;
+  if (reqdWorkGroupSize) {
+    for (i = 0; i < _dims; i++) {
+      _reqdWorkGroupSize[i] = reqdWorkGroupSize[i];
-    for (i = 0; i < MAX_DIMS; i++)
-    {
-        _globalWorkOffset[i] = 0;
+    for (i = _dims; i < MAX_DIMS; i++) {
+      _reqdWorkGroupSize[i] = 1;
+  } else {
+    _reqdWorkGroupSize[0] = 0;
+    _reqdWorkGroupSize[1] = 0;
+    _reqdWorkGroupSize[2] = 0;
+  }
-    if (globalWorkOffset)
-    {
-        _globalWorkOffset_IsNull = false;
-        for (i = 0; i < _dims; i++)
-        {
-            _globalWorkOffset[i] = globalWorkOffset[i];
-        }
-    }
+  _testRange = Range::ALL;
-    for (i = 0; i < MAX_DIMS; i++)
-    {
-        _enqueuedLocalSize[i] = 1;
-    }
+  _numOfGlobalWorkItems = _globalSize[0]*_globalSize[1]*_globalSize[2];
-    if (localSize)
-    {
-        _localSize_IsNull = false;
-        for (i = 0; i < _dims; i++)
-        {
-            _enqueuedLocalSize[i] = _localSize[i];
-        }
-    }
+  DataContainerAttrib temp = {{0, 0, 0}};
-    if (reqdWorkGroupSize)
-    {
-        for (i = 0; i < _dims; i++)
-        {
-            _reqdWorkGroupSize[i] = reqdWorkGroupSize[i];
-        }
-        for (i = _dims; i < MAX_DIMS; i++)
-        {
-            _reqdWorkGroupSize[i] = 1;
-        }
-    }
-    else
-    {
-        _reqdWorkGroupSize[0] = 0;
-        _reqdWorkGroupSize[1] = 0;
-        _reqdWorkGroupSize[2] = 0;
-    }
+  // array with results from each region
+  _resultsRegionArray.resize(NUMBER_OF_REGIONS, temp);
+  _referenceRegionArray.resize(NUMBER_OF_REGIONS, temp);
-    _testRange = Range::ALL;
-    _numOfGlobalWorkItems = _globalSize[0] * _globalSize[1] * _globalSize[2];
-    DataContainerAttrib temp = { { 0, 0, 0 } };
-    // array with results from each region
-    _resultsRegionArray.resize(NUMBER_OF_REGIONS, temp);
-    _referenceRegionArray.resize(NUMBER_OF_REGIONS, temp);
 TestNonUniformWorkGroup::~TestNonUniformWorkGroup () {
@@ -541,7 +482,7 @@
   if(_localSize_IsNull == false)
-  std::string buildOptions{};
+  std::string buildOptions = BUILD_CL_STD_2_0;
   if(_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) {
     std::ostringstream tmp(" ");
     tmp << " -D RWGSX=" << _reqdWorkGroupSize[0]
@@ -780,50 +721,42 @@
   return 0;
-void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims,
-                                                 size_t *globalSize,
-                                                 const size_t *localSize,
-                                                 int range)
-    runTestNonUniformWorkGroup(dims, globalSize, localSize, NULL, NULL, range);
+void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize,
+  const size_t *localSize, int range) {
+  runTestNonUniformWorkGroup (dims, globalSize, localSize, NULL, NULL, range);
-void SubTestExecutor::runTestNonUniformWorkGroup(
-    const cl_uint dims, size_t *globalSize, const size_t *localSize,
-    const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize, int range)
+void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize,
+  const size_t *localSize, const size_t *globalWorkOffset,
+  const size_t *reqdWorkGroupSize, int range) {
-    int err;
-    ++_overallCounter;
-    TestNonUniformWorkGroup test(_device, _context, _queue, dims, globalSize,
-                                 localSize, NULL, globalWorkOffset,
-                                 reqdWorkGroupSize);
+  int err;
+  ++_overallCounter;
+  TestNonUniformWorkGroup test (_device, _context, _queue, dims, globalSize, localSize,
+    NULL, globalWorkOffset, reqdWorkGroupSize);
-    test.setTestRange(range);
-    err = test.prepareDevice();
-    if (err)
-    {
-        log_error("Error: prepare device\n");
-        ++_failCounter;
-        return;
-    }
+  test.setTestRange(range);
+  err = test.prepareDevice();
+  if (err) {
+    log_error ("Error: prepare device\n");
+    ++_failCounter;
+    return;
+  }
-    err = test.runKernel();
-    if (err)
-    {
-        log_error("Error: run kernel\n");
-        ++_failCounter;
-        return;
-    }
+  err = test.runKernel();
+  if (err) {
+    log_error ("Error: run kernel\n");
+    ++_failCounter;
+    return;
+  }
-    err = test.verifyResults();
-    if (err)
-    {
-        log_error("Error: verify results\n");
-        ++_failCounter;
-        return;
-    }
+  err = test.verifyResults();
+  if (err) {
+    log_error ("Error: verify results\n");
+    ++_failCounter;
+    return;
+  }
 int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
@@ -831,7 +764,7 @@
   clProgramWrapper program;
   clKernelWrapper testKernel;
-  std::string buildOptions{};
+  std::string buildOptions = BUILD_CL_STD_2_0;
   if (testRange & Range::BASIC)
     buildOptions += " -D TESTBASIC";
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
index 414d100..e0c635a 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
@@ -25,6 +25,8 @@
+#define BUILD_CL_STD_2_0 "-cl-std=CL2.0"
 #define MAX_DIMS 3
 // This structure reflects data received from kernel.
@@ -60,21 +62,18 @@
 // Main class responsible for testing
 class TestNonUniformWorkGroup {
-    TestNonUniformWorkGroup(const cl_device_id &device,
-                            const cl_context &context,
-                            const cl_command_queue &queue, const cl_uint dims,
-                            size_t *globalSize, const size_t *localSize,
-                            const size_t *buffersSize,
-                            const size_t *globalWorkOffset,
-                            const size_t *reqdWorkGroupSize = NULL);
-    ~TestNonUniformWorkGroup();
+  TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context,
+    const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize,
+    const size_t *localSize, const size_t *buffersSize, const size_t *globalWorkOffset,
+    const size_t *reqdWorkGroupSize=NULL);
-    static size_t getMaxLocalWorkgroupSize(const cl_device_id &device);
-    static void setMaxLocalWorkgroupSize(size_t workGroupSize)
-    {
-        TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize;
-    }
+  ~TestNonUniformWorkGroup ();
+  static size_t getMaxLocalWorkgroupSize (const cl_device_id &device);
+  static void setMaxLocalWorkgroupSize (size_t workGroupSize) {
+    TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize;
+  }
   static void enableStrictMode (bool state);
   void setTestRange (int range) {_testRange = range;}
@@ -127,13 +126,12 @@
   SubTestExecutor(const cl_device_id &device, const cl_context &context, const cl_command_queue &queue)
     : _device (device), _context (context), _queue (queue), _failCounter (0), _overallCounter (0) {}
-  void runTestNonUniformWorkGroup(const cl_uint dims, size_t *globalSize,
-                                  const size_t *localSize, int range);
+  void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize,
+    const size_t *localSize, int range);
-  void runTestNonUniformWorkGroup(const cl_uint dims, size_t *globalSize,
-                                  const size_t *localSize,
-                                  const size_t *globalWorkOffset,
-                                  const size_t *reqdWorkGroupSize, int range);
+  void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize,
+    const size_t *localSize, const size_t *globalWorkOffset,
+    const size_t *reqdWorkGroupSize, int range);
   int calculateWorkGroupSize(size_t &maxWgSize, int testRange);
   int status();
diff --git a/test_conformance/non_uniform_work_group/main.cpp b/test_conformance/non_uniform_work_group/main.cpp
index 64eff96..e448afe 100644
--- a/test_conformance/non_uniform_work_group/main.cpp
+++ b/test_conformance/non_uniform_work_group/main.cpp
@@ -43,9 +43,7 @@
     auto expected_min_version = Version(2, 0);
     if (version < expected_min_version)
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
+        version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
         return TEST_SKIP;
     return TEST_PASS;
diff --git a/test_conformance/opencl_conformance_tests_12_conversions.csv b/test_conformance/opencl_conformance_tests_12_conversions.csv
new file mode 100644
index 0000000..c8e283a
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_conversions.csv
@@ -0,0 +1,4 @@
+# OpenCL Conformance Test Suite (conversions only)
diff --git a/test_conformance/opencl_conformance_tests_12_d3d.csv b/test_conformance/opencl_conformance_tests_12_d3d.csv
new file mode 100644
index 0000000..53466eb
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_d3d.csv
@@ -0,0 +1,5 @@
+# OpenCL Conformance Test for DirectX interop
diff --git a/test_conformance/opencl_conformance_tests_12_full.csv b/test_conformance/opencl_conformance_tests_12_full.csv
new file mode 100644
index 0000000..083fcfa
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_full.csv
@@ -0,0 +1,82 @@
+# OpenCL Conformance Test Suite (full version)
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+# #########################################
+# Basic operation tests
+# #########################################
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full*
+Multiple Device/Context,multiple_device_context/test_multiples
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+Device Partitioning,device_partition/test_device_partition
+# #########################################
+# Buffers and images
+# #########################################
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Integer Ops,integer_ops/test_integer_ops
+Half Ops,half/test_half
diff --git a/test_conformance/opencl_conformance_tests_12_full_no_math_or_conversions.csv b/test_conformance/opencl_conformance_tests_12_full_no_math_or_conversions.csv
new file mode 100644
index 0000000..2f8d653
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_full_no_math_or_conversions.csv
@@ -0,0 +1,79 @@
+# OpenCL Conformance Test Suite (full version)
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+# #########################################
+# Basic operation tests
+# #########################################
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full*
+Multiple Device/Context,multiple_device_context/test_multiples
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+# #########################################
+# Buffers and images
+# #########################################
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Integer Ops,integer_ops/test_integer_ops
+Half Ops,half/test_half
diff --git a/test_conformance/opencl_conformance_tests_12_math.csv b/test_conformance/opencl_conformance_tests_12_math.csv
new file mode 100644
index 0000000..e033190
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_math.csv
@@ -0,0 +1,4 @@
+# OpenCL Conformance Test Suite (math only)
diff --git a/test_conformance/opencl_conformance_tests_12_quick.csv b/test_conformance/opencl_conformance_tests_12_quick.csv
new file mode 100644
index 0000000..af59165
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_12_quick.csv
@@ -0,0 +1,81 @@
+# OpenCL Conformance Test Suite (quick version)
+# The quick version skips some long-running image tests, runs a shorter math test,
+# and does not run the conversion test.
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+# #########################################
+# Basic operation tests
+# #########################################
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions quick*
+#Multiple Device/Context,multiple_device_context/test_multiples
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+Device Partitioning,device_partition/test_device_partition
+# #########################################
+# Buffers and images
+# #########################################
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Math,math_brute_force/test_bruteforce -w
+Integer Ops,integer_ops/test_integer_ops integer_* quick_*
+Half Ops,half/test_half -w
diff --git a/test_conformance/opencl_conformance_tests_20_full.csv b/test_conformance/opencl_conformance_tests_20_full.csv
new file mode 100644
index 0000000..e1c0ecd
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_20_full.csv
@@ -0,0 +1,102 @@
+# OpenCL Conformance Test Suite (full version)
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+# #########################################
+# Basic operation tests
+# #########################################
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full*
+Multiple Device/Context,multiple_device_context/test_multiples
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+Device Partitioning,device_partition/test_device_partition
+# #########################################
+# Buffers and images
+# #########################################
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Integer Ops,integer_ops/test_integer_ops
+Half Ops,half/test_half
+# OpenCL 2.0 tests
+C11 Atomics,c11_atomics/test_c11_atomics
+Execution Model,device_execution/test_device_execution
+Generic Address Space,generic_address_space/test_generic_address_space
+Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group
+# Extensions
+Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST
+Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps
+Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps
diff --git a/test_conformance/opencl_conformance_tests_20_full_no_math_or_conversions.csv b/test_conformance/opencl_conformance_tests_20_full_no_math_or_conversions.csv
new file mode 100644
index 0000000..2f8d653
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_20_full_no_math_or_conversions.csv
@@ -0,0 +1,79 @@
+# OpenCL Conformance Test Suite (full version)
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+# #########################################
+# Basic operation tests
+# #########################################
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full*
+Multiple Device/Context,multiple_device_context/test_multiples
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+# #########################################
+# Buffers and images
+# #########################################
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Integer Ops,integer_ops/test_integer_ops
+Half Ops,half/test_half
diff --git a/test_conformance/opencl_conformance_tests_20_quick.csv b/test_conformance/opencl_conformance_tests_20_quick.csv
new file mode 100644
index 0000000..af59165
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_20_quick.csv
@@ -0,0 +1,81 @@
+# OpenCL Conformance Test Suite (quick version)
+# The quick version skips some long-running image tests, runs a shorter math test,
+# and does not run the conversion test.
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+# #########################################
+# Basic operation tests
+# #########################################
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions quick*
+#Multiple Device/Context,multiple_device_context/test_multiples
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+Device Partitioning,device_partition/test_device_partition
+# #########################################
+# Buffers and images
+# #########################################
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Math,math_brute_force/test_bruteforce -w
+Integer Ops,integer_ops/test_integer_ops integer_* quick_*
+Half Ops,half/test_half -w
diff --git a/test_conformance/opencl_conformance_tests_21_full_spirv.csv b/test_conformance/opencl_conformance_tests_21_full_spirv.csv
new file mode 100644
index 0000000..4b84596
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_21_full_spirv.csv
@@ -0,0 +1,108 @@
+# OpenCL Conformance Test Suite (full version)
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic --compilation-mode spir-v --compilation-cache-path .
+API,api/test_api --compilation-mode spir-v --compilation-cache-path .
+Compiler,compiler/test_compiler --compilation-mode spir-v --compilation-cache-path .
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns --compilation-mode spir-v --compilation-cache-path .
+Geometric Functions,geometrics/test_geometrics --compilation-mode spir-v --compilation-cache-path .
+Relationals,relationals/test_relationals --compilation-mode spir-v --compilation-cache-path .
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full* --compilation-mode spir-v --compilation-cache-path .
+Multiple Device/Context,multiple_device_context/test_multiples --compilation-mode spir-v --compilation-cache-path .
+Atomics,atomics/test_atomics --compilation-mode spir-v --compilation-cache-path .
+Profiling,profiling/test_profiling --compilation-mode spir-v --compilation-cache-path .
+Events,events/test_events --compilation-mode spir-v --compilation-cache-path .
+Allocations (single maximum),allocations/test_allocations single 5 all --compilation-mode spir-v --compilation-cache-path .
+Allocations (total maximum),allocations/test_allocations multiple 5 all --compilation-mode spir-v --compilation-cache-path .
+VecAlign, vec_align/test_vecalign --compilation-mode spir-v --compilation-cache-path .
+VecStep, vec_step/test_vecstep --compilation-mode spir-v --compilation-cache-path .
+Printf,printf/test_printf --compilation-mode spir-v --compilation-cache-path .
+Device Partitioning,device_partition/test_device_partition --compilation-mode spir-v --compilation-cache-path .
+# #########################################
+# Buffers and images
+# #########################################
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Buffers,buffers/test_buffers --compilation-mode spir-v --compilation-cache-path .
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods --compilation-mode spir-v --compilation-cache-path .
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads --compilation-mode spir-v --compilation-cache-path .
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches --compilation-mode spir-v --compilation-cache-path .
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images --compilation-mode spir-v --compilation-cache-path .
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl --compilation-mode spir-v --compilation-cache-path .
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select --compilation-mode spir-v --compilation-cache-path .
+Conversions,conversions/test_conversions --compilation-mode spir-v --compilation-cache-path .
+Contractions,contractions/test_contractions --compilation-mode spir-v --compilation-cache-path .
+Math,math_brute_force/test_bruteforce --compilation-mode spir-v --compilation-cache-path .
+Integer Ops,integer_ops/test_integer_ops --compilation-mode spir-v --compilation-cache-path .
+Half Ops,half/test_half --compilation-mode spir-v --compilation-cache-path .
+# OpenCL 2.0 tests
+C11 Atomics,c11_atomics/test_c11_atomics --compilation-mode spir-v --compilation-cache-path .
+Execution Model,device_execution/test_device_execution --compilation-mode spir-v --compilation-cache-path .
+Generic Address Space,generic_address_space/test_generic_address_space --compilation-mode spir-v --compilation-cache-path .
+Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group --compilation-mode spir-v --compilation-cache-path .
+Pipes,pipes/test_pipes --compilation-mode spir-v --compilation-cache-path .
+SVM,SVM/test_svm --compilation-mode spir-v --compilation-cache-path .
+Workgroups,workgroups/test_workgroups --compilation-mode spir-v --compilation-cache-path .
+# OpenCL 2.1 tests
+Device timer,device_timer/test_device_timer
+SPIRV new,spirv_new/test_spirv_new -ILPath spirv_bin
+# Extensions
+Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
+Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps --compilation-mode spir-v --compilation-cache-path .
+Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps --compilation-mode spir-v --compilation-cache-path .
+Subgroups,subgroups/test_subgroups --compilation-mode spir-v --compilation-cache-path .
diff --git a/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv b/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv
new file mode 100644
index 0000000..bbde183
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv
@@ -0,0 +1,100 @@
+# OpenCL Conformance Test Suite (quick version)
+# The quick version skips some long-running image tests, runs a shorter math test,
+# and does not run the conversion test.
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+# #########################################
+# Basic operation tests
+# #########################################
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions quick*
+Multiple Device/Context,multiple_device_context/test_multiples
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+Device Partitioning,device_partition/test_device_partition
+# #########################################
+# Buffers and images
+# #########################################
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Conversions,conversions/test_conversions -w
+Math,math_brute_force/test_bruteforce -w
+Integer Ops,integer_ops/test_integer_ops integer_* quick_*
+Half Ops,half/test_half -w
+# OpenCL 2.0 tests
+C11 Atomics,c11_atomics/test_c11_atomics
+Execution Model,device_execution/test_device_execution
+Generic Address Space,generic_address_space/test_generic_address_space
+Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group
+# OpenCL 2.1 tests
+Device timer,device_timer/test_device_timer
+SPIRV new,spirv_new/test_spirv_new -ILPath spirv_bin
+# Extensions
+Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST
+Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps
+Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps
diff --git a/test_conformance/opencl_conformance_tests_22.csv b/test_conformance/opencl_conformance_tests_22.csv
new file mode 100644
index 0000000..2ef864a
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_22.csv
@@ -0,0 +1,45 @@
+# OpenCL Conformance Test Suite (2.2 version)
+# #########################################
+# New API features
+# #########################################
+API (ctors and dtors of global scope vars) , clcpp/api/test_cpp_api "test_global_scope*"
+API (specialization constants)             , clcpp/api/test_cpp_api "test_spec_consts*"
+# #########################################
+# New representation of types
+# #########################################
+Images and samplers                        , clcpp/images/test_cpp_images
+Pipes and reservations                     , clcpp/pipes/test_cpp_pipes "test_pipes_pipe"
+Device enqueue and events                  , clcpp/device_queue/test_cpp_device_queue
+Address spaces                             , clcpp/address_spaces/test_cpp_address_spaces
+# #########################################
+# New representation of functions
+# #########################################
+Conversions (convert_cast)                 , clcpp/convert/test_cpp_convert
+Reinterpreting (as_type)                   , clcpp/reinterpret/test_cpp_reinterpret
+Atomics                                    , clcpp/atomics/test_cpp_atomics
+Work-item functions                        , clcpp/workitems/test_cpp_workitems
+Work-group functions                       , clcpp/workgroups/test_cpp_workgroups
+Sub-group functions                        , clcpp/subgroups/test_cpp_subgroups
+Synchronization functions                  , clcpp/synchronization/test_cpp_synchronization "test_work_group_barrier*" "test_sub_group_barrier*"
+Math functions                             , clcpp/math_funcs/test_cpp_math_funcs
+Integer functions                          , clcpp/integer_funcs/test_cpp_integer_funcs
+Common functions                           , clcpp/common_funcs/test_cpp_common_funcs
+Geometric functions                        , clcpp/geometric_funcs/test_cpp_geometric_funcs
+Relational functions                       , clcpp/relational_funcs/test_cpp_relational_funcs
+vload and vstore functions                 , clcpp/vload_vstore/test_cpp_vload_vstore_funcs
+# #########################################
+# New in OpenCL C++
+# #########################################
+Specialization constants                   , clcpp/spec_constants/test_cpp_spec_constants
+Named barriers (KHR extension)             , clcpp/synchronization/test_cpp_synchronization "test_work_group_named_barrier*"
+required_num_sub_groups attribute          , clcpp/attributes/test_cpp_attributes "test_required_num_sub_groups*"
+ivdep attribute                            , clcpp/attributes/test_cpp_attributes "test_ivdep*"
+max_size attribute                         , clcpp/attributes/test_cpp_attributes "test_max_size*"
+Ctors and dtors of global scope objects    , clcpp/program_scope_ctors_dtors/test_cpp_program_scope_ctors_dtors
+Pipe storages                              , clcpp/pipes/test_cpp_pipes "test_pipes_pipe_storage"
diff --git a/test_conformance/opencl_conformance_tests_full.csv b/test_conformance/opencl_conformance_tests_full.csv
index 2b0dc8a..0555527 100644
--- a/test_conformance/opencl_conformance_tests_full.csv
+++ b/test_conformance/opencl_conformance_tests_full.csv
@@ -30,8 +30,9 @@
 Allocations (single maximum),allocations/test_allocations single 5 all
-Allocations (total maximum),allocations/test_allocations multiple 5 all
-Vectors, vectors/test_vectors
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
 Device Partitioning,device_partition/test_device_partition
@@ -39,23 +40,23 @@
 # Buffers and images
 # #########################################
-Images (API Info),images/clGetInfo/test_cl_get_info
-Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
 Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
 Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
 Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
 Images (clCopyImage),images/clCopyImage/test_cl_copy_images
 Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
-Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images
-Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images
-Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches
-Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images
-Images (clFillImage),images/clFillImage/test_cl_fill_images
-Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches
-Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images
-Images (Samplerless),images/samplerlessReads/test_samplerless_reads
-Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches
-Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
 Mem (Host Flags),mem_host_flags/test_mem_host_flags
 # #########################################
@@ -95,7 +96,7 @@
 # OpenCL 2.1 tests
 Device timer,device_timer/test_device_timer
-SPIRV new,spirv_new/test_spirv_new --spirv-binaries-path spirv_bin
+SPIRV new,spirv_new/test_spirv_new -ILPath spirv_bin
 # Extensions
diff --git a/test_conformance/opencl_conformance_tests_full_binary.csv b/test_conformance/opencl_conformance_tests_full_binary.csv
deleted file mode 100644
index 348f32e..0000000
--- a/test_conformance/opencl_conformance_tests_full_binary.csv
+++ /dev/null
@@ -1,107 +0,0 @@
-# OpenCL Conformance Test Suite (full version)
-# #########################################
-# Basic Information on the compute device
-# #########################################
-Compute Info,computeinfo/test_computeinfo
-# #########################################
-# Basic operation tests
-# #########################################
-Basic,basic/test_basic --compilation-mode binary --compilation-cache-path .
-API,api/test_api --compilation-mode binary --compilation-cache-path .
-Compiler,compiler/test_compiler --compilation-mode binary --compilation-cache-path .
-# #########################################
-# Common mathematical functions
-# #########################################
-Common Functions,commonfns/test_commonfns --compilation-mode binary --compilation-cache-path .
-Geometric Functions,geometrics/test_geometrics --compilation-mode binary --compilation-cache-path .
-Relationals,relationals/test_relationals --compilation-mode binary --compilation-cache-path .
-# #########################################
-# General operation
-# #########################################
-Thread Dimensions,thread_dimensions/test_thread_dimensions full* --compilation-mode binary --compilation-cache-path .
-Multiple Device/Context,multiple_device_context/test_multiples --compilation-mode binary --compilation-cache-path .
-Atomics,atomics/test_atomics --compilation-mode binary --compilation-cache-path .
-Profiling,profiling/test_profiling --compilation-mode binary --compilation-cache-path .
-Events,events/test_events --compilation-mode binary --compilation-cache-path .
-Allocations (single maximum),allocations/test_allocations single 5 all --compilation-mode binary --compilation-cache-path .
-Allocations (total maximum),allocations/test_allocations multiple 5 all --compilation-mode binary --compilation-cache-path .
-Vectors, vectors/test_vectors --compilation-mode binary --compilation-cache-path .
-Printf,printf/test_printf --compilation-mode binary --compilation-cache-path .
-Device Partitioning,device_partition/test_device_partition --compilation-mode binary --compilation-cache-path .
-# #########################################
-# Buffers and images
-# #########################################
-Images (API Info),images/clGetInfo/test_cl_get_info
-Buffers,buffers/test_buffers --compilation-mode binary --compilation-cache-path .
-Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods --compilation-mode binary --compilation-cache-path .
-Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST --compilation-mode binary --compilation-cache-path .
-Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST --compilation-mode binary --compilation-cache-path .
-Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST --compilation-mode binary --compilation-cache-path .
-Images (clCopyImage),images/clCopyImage/test_cl_copy_images
-Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
-Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images
-Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images
-Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches
-Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images
-Images (clFillImage),images/clFillImage/test_cl_fill_images
-Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches
-Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images
-Images (Samplerless),images/samplerlessReads/test_samplerless_reads --compilation-mode binary --compilation-cache-path .
-Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches --compilation-mode binary --compilation-cache-path .
-Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images --compilation-mode binary --compilation-cache-path .
-Mem (Host Flags),mem_host_flags/test_mem_host_flags
-# #########################################
-# CPU is required to pass linear and normalized image filtering
-# #########################################
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR --compilation-mode binary --compilation-cache-path .
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR --compilation-mode binary --compilation-cache-path .
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR --compilation-mode binary --compilation-cache-path .
-# #########################################
-# OpenGL/CL interaction
-# #########################################
-OpenCL-GL Sharing,gl/test_gl --compilation-mode binary --compilation-cache-path .
-# #########################################
-# Thorough math and conversions tests
-# #########################################
-Select,select/test_select --compilation-mode binary --compilation-cache-path .
-Conversions,conversions/test_conversions --compilation-mode binary --compilation-cache-path .
-Contractions,contractions/test_contractions --compilation-mode binary --compilation-cache-path .
-Math,math_brute_force/test_bruteforce --compilation-mode binary --compilation-cache-path .
-Integer Ops,integer_ops/test_integer_ops --compilation-mode binary --compilation-cache-path .
-Half Ops,half/test_half --compilation-mode binary --compilation-cache-path .
-# OpenCL 2.0 tests
-C11 Atomics,c11_atomics/test_c11_atomics --compilation-mode binary --compilation-cache-path .
-Execution Model,device_execution/test_device_execution --compilation-mode binary --compilation-cache-path .
-Generic Address Space,generic_address_space/test_generic_address_space --compilation-mode binary --compilation-cache-path .
-Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group --compilation-mode binary --compilation-cache-path .
-Pipes,pipes/test_pipes --compilation-mode binary --compilation-cache-path .
-SVM,SVM/test_svm --compilation-mode binary --compilation-cache-path .
-Workgroups,workgroups/test_workgroups --compilation-mode binary --compilation-cache-path .
-# OpenCL 2.1 tests
-Device timer,device_timer/test_device_timer
-SPIRV new,spirv_new/test_spirv_new --spirv-binaries-path spirv_bin
-# Extensions
-Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST --compilation-mode binary --compilation-cache-path .
-Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps --compilation-mode binary --compilation-cache-path .
-Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps --compilation-mode binary --compilation-cache-path .
-Subgroups,subgroups/test_subgroups --compilation-mode binary --compilation-cache-path .
diff --git a/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv b/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv
index 89d4490..ecc1314 100644
--- a/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv
+++ b/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv
@@ -30,8 +30,9 @@
 Allocations (single maximum),allocations/test_allocations single 5 all
-Allocations (total maximum),allocations/test_allocations multiple 5 all
-Vectors, vectors/test_vectors
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
 Device Partitioning,device_partition/test_device_partition
@@ -39,23 +40,23 @@
 # Buffers and images
 # #########################################
-Images (API Info),images/clGetInfo/test_cl_get_info
-Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
 Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
 Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
 Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
 Images (clCopyImage),images/clCopyImage/test_cl_copy_images
 Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
-Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images
-Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images
-Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches
-Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images
-Images (clFillImage),images/clFillImage/test_cl_fill_images
-Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches
-Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images
-Images (Samplerless),images/samplerlessReads/test_samplerless_reads
-Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches
-Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
 Mem (Host Flags),mem_host_flags/test_mem_host_flags
 # #########################################
@@ -93,7 +94,7 @@
 # OpenCL 2.1 tests
 Device timer,device_timer/test_device_timer
-SPIRV new,spirv_new/test_spirv_new --spirv-binaries-path spirv_bin
+SPIRV new,spirv_new/test_spirv_new -ILPath spirv_bin
 # Extensions
diff --git a/test_conformance/opencl_conformance_tests_full_spirv.csv b/test_conformance/opencl_conformance_tests_full_spirv.csv
deleted file mode 100644
index 24b4186..0000000
--- a/test_conformance/opencl_conformance_tests_full_spirv.csv
+++ /dev/null
@@ -1,107 +0,0 @@
-# OpenCL Conformance Test Suite (full version)
-# #########################################
-# Basic Information on the compute device
-# #########################################
-Compute Info,computeinfo/test_computeinfo
-# #########################################
-# Basic operation tests
-# #########################################
-Basic,basic/test_basic --compilation-mode spir-v --compilation-cache-path .
-API,api/test_api --compilation-mode spir-v --compilation-cache-path .
-Compiler,compiler/test_compiler --compilation-mode spir-v --compilation-cache-path .
-# #########################################
-# Common mathematical functions
-# #########################################
-Common Functions,commonfns/test_commonfns --compilation-mode spir-v --compilation-cache-path .
-Geometric Functions,geometrics/test_geometrics --compilation-mode spir-v --compilation-cache-path .
-Relationals,relationals/test_relationals --compilation-mode spir-v --compilation-cache-path .
-# #########################################
-# General operation
-# #########################################
-Thread Dimensions,thread_dimensions/test_thread_dimensions full* --compilation-mode spir-v --compilation-cache-path .
-Multiple Device/Context,multiple_device_context/test_multiples --compilation-mode spir-v --compilation-cache-path .
-Atomics,atomics/test_atomics --compilation-mode spir-v --compilation-cache-path .
-Profiling,profiling/test_profiling --compilation-mode spir-v --compilation-cache-path .
-Events,events/test_events --compilation-mode spir-v --compilation-cache-path .
-Allocations (single maximum),allocations/test_allocations single 5 all --compilation-mode spir-v --compilation-cache-path .
-Allocations (total maximum),allocations/test_allocations multiple 5 all --compilation-mode spir-v --compilation-cache-path .
-Vectors, vectors/test_vectors --compilation-mode spir-v --compilation-cache-path .
-Printf,printf/test_printf --compilation-mode spir-v --compilation-cache-path .
-Device Partitioning,device_partition/test_device_partition --compilation-mode spir-v --compilation-cache-path .
-# #########################################
-# Buffers and images
-# #########################################
-Images (API Info),images/clGetInfo/test_cl_get_info 
-Buffers,buffers/test_buffers --compilation-mode spir-v --compilation-cache-path .
-Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods --compilation-mode spir-v --compilation-cache-path .
-Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
-Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
-Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
-Images (clCopyImage),images/clCopyImage/test_cl_copy_images
-Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
-Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
-Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
-Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
-Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
-Images (clFillImage),images/clFillImage/test_cl_fill_images 
-Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
-Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
-Images (Samplerless),images/samplerlessReads/test_samplerless_reads --compilation-mode spir-v --compilation-cache-path .
-Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches --compilation-mode spir-v --compilation-cache-path .
-Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images --compilation-mode spir-v --compilation-cache-path .
-Mem (Host Flags),mem_host_flags/test_mem_host_flags
-# #########################################
-# CPU is required to pass linear and normalized image filtering
-# #########################################
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
-CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR --compilation-mode spir-v --compilation-cache-path .
-# #########################################
-# OpenGL/CL interaction
-# #########################################
-OpenCL-GL Sharing,gl/test_gl --compilation-mode spir-v --compilation-cache-path .
-# #########################################
-# Thorough math and conversions tests
-# #########################################
-Select,select/test_select --compilation-mode spir-v --compilation-cache-path .
-Conversions,conversions/test_conversions --compilation-mode spir-v --compilation-cache-path .
-Contractions,contractions/test_contractions --compilation-mode spir-v --compilation-cache-path .
-Math,math_brute_force/test_bruteforce --compilation-mode spir-v --compilation-cache-path .
-Integer Ops,integer_ops/test_integer_ops --compilation-mode spir-v --compilation-cache-path .
-Half Ops,half/test_half --compilation-mode spir-v --compilation-cache-path .
-# OpenCL 2.0 tests
-C11 Atomics,c11_atomics/test_c11_atomics --compilation-mode spir-v --compilation-cache-path .
-Execution Model,device_execution/test_device_execution --compilation-mode spir-v --compilation-cache-path .
-Generic Address Space,generic_address_space/test_generic_address_space --compilation-mode spir-v --compilation-cache-path .
-Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group --compilation-mode spir-v --compilation-cache-path .
-Pipes,pipes/test_pipes --compilation-mode spir-v --compilation-cache-path .
-SVM,SVM/test_svm --compilation-mode spir-v --compilation-cache-path .
-Workgroups,workgroups/test_workgroups --compilation-mode spir-v --compilation-cache-path .
-# OpenCL 2.1 tests
-Device timer,device_timer/test_device_timer
-SPIRV new,spirv_new/test_spirv_new --spirv-binaries-path spirv_bin
-# Extensions
-Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST --compilation-mode spir-v --compilation-cache-path .
-Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps --compilation-mode spir-v --compilation-cache-path .
-Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps --compilation-mode spir-v --compilation-cache-path .
-Subgroups,subgroups/test_subgroups --compilation-mode spir-v --compilation-cache-path .
diff --git a/test_conformance/opencl_conformance_tests_math.csv b/test_conformance/opencl_conformance_tests_math.csv
index 03fddea..e033190 100644
--- a/test_conformance/opencl_conformance_tests_math.csv
+++ b/test_conformance/opencl_conformance_tests_math.csv
@@ -1,4 +1,4 @@
 # OpenCL Conformance Test Suite (math only)
diff --git a/test_conformance/opencl_conformance_tests_quick.csv b/test_conformance/opencl_conformance_tests_quick.csv
index 043df82..af59165 100644
--- a/test_conformance/opencl_conformance_tests_quick.csv
+++ b/test_conformance/opencl_conformance_tests_quick.csv
@@ -32,7 +32,7 @@
 Allocations (single maximum),allocations/test_allocations single 5 all
-Allocations (total maximum),allocations/test_allocations multiple 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
 Device Partitioning,device_partition/test_device_partition
@@ -40,22 +40,22 @@
 # Buffers and images
 # #########################################
-Images (API Info),images/clGetInfo/test_cl_get_info
-Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
 Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
 Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
 Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
 Images (clCopyImage),images/clCopyImage/test_cl_copy_images
-Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images
-Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images
-Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches
-Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images
-Images (clFillImage),images/clFillImage/test_cl_fill_images
-Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches
-Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images
-Images (Samplerless),images/samplerlessReads/test_samplerless_reads
-Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches
-Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
 Mem (Host Flags),mem_host_flags/test_mem_host_flags
 # #########################################
@@ -79,29 +79,3 @@
 Math,math_brute_force/test_bruteforce -w
 Integer Ops,integer_ops/test_integer_ops integer_* quick_*
 Half Ops,half/test_half -w
-# OpenCL 2.0 tests
-C11 Atomics,c11_atomics/test_c11_atomics
-Execution Model,device_execution/test_device_execution
-Generic Address Space,generic_address_space/test_generic_address_space
-Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group
-# OpenCL 2.1 tests
-Device timer,device_timer/test_device_timer
-SPIRV new,spirv_new/test_spirv_new --spirv-binaries-path spirv_bin
-# Extensions
-Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST
-Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps
-Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps
diff --git a/test_conformance/pipes/main.cpp b/test_conformance/pipes/main.cpp
index 4241c4d..45b4c28 100644
--- a/test_conformance/pipes/main.cpp
+++ b/test_conformance/pipes/main.cpp
@@ -23,12 +23,9 @@
 test_status InitCL(cl_device_id device) {
   auto version = get_device_cl_version(device);
   auto expected_min_version = Version(2, 0);
-  if (version < expected_min_version)
-  {
-      version_expected_info("Test", "OpenCL",
-                            expected_min_version.to_string().c_str(),
-                            version.to_string().c_str());
-      return TEST_SKIP;
+  if (version < expected_min_version) {
+    version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
+    return TEST_SKIP;
   int error;
diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp
index 169ab80..85247f8 100644
--- a/test_conformance/pipes/test_pipe_limits.cpp
+++ b/test_conformance/pipes/test_pipe_limits.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -218,8 +218,9 @@
     const char *sources[] = { kernel_source.c_str() };
     // Create producer kernel
-    err = create_single_kernel_helper(context, &program, &kernel[0], 1, sources,
-                                      kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel[0], 1, sources, kernelName[0],
+        "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
     //Create consumer kernel
@@ -367,8 +368,9 @@
     const char *sources[] = { kernel_source.c_str() };
     // Create producer kernel
-    err = create_single_kernel_helper(context, &program, &kernel[0], 1, sources,
-                                      kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel[0], 1, sources, kernelName[0],
+        "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
     //Create consumer kernel
@@ -531,8 +533,9 @@
     const char *sources[] = { kernel_source.c_str() };
     // Create producer kernel
-    err = create_single_kernel_helper(context, &program, &kernel[0], 1, sources,
-                                      kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel[0], 1, sources, kernelName[0],
+        "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
     // Create consumer kernel
diff --git a/test_conformance/pipes/test_pipe_query_functions.cpp b/test_conformance/pipes/test_pipe_query_functions.cpp
index 21d1950..f9c93aa 100644
--- a/test_conformance/pipes/test_pipe_query_functions.cpp
+++ b/test_conformance/pipes/test_pipe_query_functions.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -140,9 +140,7 @@
     test_error_ret(err, " clCreatePipe failed", -1);
     // Create producer kernel
-    err = create_single_kernel_helper(
-        context, &program, &kernel[0], 1,
-        (const char **)&pipe_query_functions_kernel_code, kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_query_functions_kernel_code, kernelName[0], "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
     //Create consumer kernel
diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp
index dd0d121..4bb4468 100644
--- a/test_conformance/pipes/test_pipe_read_write.cpp
+++ b/test_conformance/pipes/test_pipe_read_write.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -361,8 +361,8 @@
     int            i;
     int            sum_input = 0, sum_output = 0;
-    cl_half *inptr = (cl_half *)ptr1;
-    cl_half *outptr = (cl_half *)ptr2;
+    cl_ushort    *inptr = (cl_ushort *)ptr1;
+    cl_ushort    *outptr = (cl_ushort *)ptr2;
     for(i = 0; i < n; i++)
@@ -531,8 +531,9 @@
         std::string kernel_source = sourceCode[i].str();
         const char *sources[] = { kernel_source.c_str() };
         // Create producer kernel
-        err = create_single_kernel_helper(context, &program[i], &kernel[ii], 1,
-                                          sources, kernelName[ii]);
+        err = create_single_kernel_helper_with_build_options(
+            context, &program[i], &kernel[ii], 1, sources, kernelName[ii],
+            "-cl-std=CL2.0");
         test_error_ret(err, " Error creating program", -1);
@@ -658,8 +659,7 @@
     test_error_ret(err, " clCreatePipe failed", -1);
     // Create producer kernel
-    err = create_single_kernel_helper(context, &program, &kernel[0], 1,
-                                      &kernelCode, kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, &kernelCode, kernelName[0], "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
     //Create consumer kernel
diff --git a/test_conformance/pipes/test_pipe_readwrite_errors.cpp b/test_conformance/pipes/test_pipe_readwrite_errors.cpp
index d4b4524..1b9fc31 100644
--- a/test_conformance/pipes/test_pipe_readwrite_errors.cpp
+++ b/test_conformance/pipes/test_pipe_readwrite_errors.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -115,9 +115,7 @@
     test_error_ret(err, " clCreatePipe failed", -1);
     // Create producer kernel
-    err = create_single_kernel_helper(
-        context, &program, &kernel[0], 1,
-        (const char **)&pipe_readwrite_errors_kernel_code, kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_readwrite_errors_kernel_code, kernelName[0], "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
     //Create consumer kernel
diff --git a/test_conformance/pipes/test_pipe_subgroups.cpp b/test_conformance/pipes/test_pipe_subgroups.cpp
index b3e1718..b41170c 100644
--- a/test_conformance/pipes/test_pipe_subgroups.cpp
+++ b/test_conformance/pipes/test_pipe_subgroups.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -146,9 +146,7 @@
     test_error_ret(err, " clCreatePipe failed", -1);
     // Create producer kernel
-    err = create_single_kernel_helper(
-        context, &program, &kernel[0], 1,
-        (const char **)&pipe_subgroups_kernel_code, kernelName[0]);
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_subgroups_kernel_code, kernelName[0], "-cl-std=CL2.0");
     test_error_ret(err, " Error creating program", -1);
     //Create consumer kernel
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index 2b804e4..a65a3dc 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -20,7 +20,7 @@
 #include <memory>
 #if ! defined( _WIN32)
-#if defined(__APPLE__)
+#if ! defined( __ANDROID__ )
 #include <sys/sysctl.h>
 #include <unistd.h>
@@ -59,7 +59,7 @@
 //Stream helper functions
 //Associate stdout stream with the file(gFileName):i.e redirect stdout stream to the specific files (gFileName)
-static int acquireOutputStream(int* error);
+static int acquireOutputStream();
 //Close the file(gFileName) associated with the stdout stream and disassociates it.
 static void releaseOutputStream(int fd);
@@ -141,15 +141,10 @@
 // acquireOutputStream
-static int acquireOutputStream(int* error)
+static int acquireOutputStream()
     int fd = streamDup(fileno(stdout));
-    *error = 0;
-    if (!freopen(gFileName, "w", stdout))
-    {
-        releaseOutputStream(fd);
-        *error = -1;
-    }
+    freopen(gFileName,"w",stdout);
     return fd;
@@ -306,22 +301,15 @@
     if(allTestCase[testId]->_type == TYPE_VECTOR)
-        err = create_single_kernel_helper(
-            context, &program, kernel_ptr,
-            sizeof(sourceVec) / sizeof(sourceVec[0]), sourceVec, testname);
+        err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceVec) / sizeof(sourceVec[0]), sourceVec, NULL);
     else if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
-        err = create_single_kernel_helper(context, &program, kernel_ptr,
-                                          sizeof(sourceAddrSpace)
-                                              / sizeof(sourceAddrSpace[0]),
-                                          sourceAddrSpace, testname);
+        err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceAddrSpace) / sizeof(sourceAddrSpace[0]), sourceAddrSpace, NULL);
-        err = create_single_kernel_helper(
-            context, &program, kernel_ptr,
-            sizeof(sourceGen) / sizeof(sourceGen[0]), sourceGen, testname);
+        err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceGen) / sizeof(sourceGen[0]), sourceGen, NULL);
     if (!program || err) {
@@ -329,6 +317,12 @@
         return NULL;
+    *kernel_ptr = clCreateKernel(program, testname, &err);
+    if ( err ) {
+        log_error("clCreateKernel failed (%d)\n", err);
+        return NULL;
+    }
     return program;
@@ -498,12 +492,7 @@
-    fd = acquireOutputStream(&err);
-    if (err != 0)
-    {
-        log_error("Error while redirection stdout to file");
-        goto exit;
-    }
+    fd = acquireOutputStream();
     globalWorkSize[0] = 1;
     cl_event ndrEvt;
     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL,&ndrEvt);
@@ -993,12 +982,7 @@
     uint32_t compute_devices = 0;
     int err;
-    gFd = acquireOutputStream(&err);
-    if (err != 0)
-    {
-        log_error("Error while redirection stdout to file");
-        return TEST_FAIL;
-    }
+    gFd = acquireOutputStream();
     size_t config_size = sizeof( device_frequency );
@@ -1024,24 +1008,20 @@
     auto expected_min_version = Version(1, 2);
     if (version < expected_min_version)
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
+        version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
         return TEST_SKIP;
     log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
-    gFd = acquireOutputStream(&err);
-    if (err != 0)
-    {
-        log_error("Error while redirection stdout to file");
-        return TEST_FAIL;
-    }
-    cl_context_properties printf_properties[] = {
-        CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printfCallBack,
-    };
+    gFd = acquireOutputStream();
+    cl_context_properties printf_properties[] =
+        {
+            CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printfCallBack,
+            0
+        };
     cl_context_properties* props = NULL;
diff --git a/test_conformance/profiling/copy.cpp b/test_conformance/profiling/copy.cpp
index 46d1560..97e729e 100644
--- a/test_conformance/profiling/copy.cpp
+++ b/test_conformance/profiling/copy.cpp
@@ -107,14 +107,12 @@
     int_input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     int_output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err );
     if( !streams[0] ){
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
     if( !streams[1] ){
         log_error("clCreateBuffer failed\n");
         return -1;
@@ -252,15 +250,13 @@
     inptr = (cl_int *)malloc(sizeof(cl_int) * num_elements);
     outptr = (cl_int *)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, &err);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -440,6 +436,9 @@
     void                            *dst = NULL;
     cl_kernel                    kernel[1];
     size_t                        threads[2];
+    size_t                        localThreads[2];
     int                                err = 0;
     cl_mem_flags            flags;
     unsigned int            num_channels = 4;
@@ -457,6 +456,16 @@
     threads[0] = (size_t)w;
     threads[1] = (size_t)h;
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
     inptr = (void *)generate_image( (int)num_bytes, d );
     if( ! inptr ){
         log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
@@ -471,7 +480,7 @@
     // allocate the input image
-    flags = CL_MEM_READ_WRITE;
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
     memobjs[0] = create_image_2d(context, flags, &image_format_desc, w, h, 0, NULL, &err);
     if( memobjs[0] == (cl_mem)0 ) {
         free( dst );
@@ -480,8 +489,7 @@
         return -1;
-    memobjs[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, num_bytes, NULL, &err);
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), num_bytes, NULL, &err );
     if( memobjs[1] == (cl_mem)0 ) {
         free( dst );
@@ -531,8 +539,11 @@
         return -1;
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
     err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
     if (err != CL_SUCCESS){
         print_error( err, "clEnqueueNDRangeKernel failed" );
         clReleaseKernel( kernel[0] );
@@ -789,7 +800,7 @@
     // allocate the input image
-    flags = CL_MEM_READ_WRITE;
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
     memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
     if( memobjs[0] == (cl_mem)0 ){
         free( dst );
@@ -798,9 +809,7 @@
         return -1;
-    memobjs[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       channel_nbytes * num_channels * w * h, NULL, &err);
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * num_channels*w*h, NULL, &err );
     if( memobjs[1] == (cl_mem)0 ) {
         clReleaseMemObject( memobjs[0] );
         free( dst );
diff --git a/test_conformance/profiling/execute.cpp b/test_conformance/profiling/execute.cpp
index edfc043..31a5db8 100644
--- a/test_conformance/profiling/execute.cpp
+++ b/test_conformance/profiling/execute.cpp
@@ -175,6 +175,9 @@
     cl_event            executeEvent;
     cl_ulong    queueStart, submitStart, writeStart, writeEnd;
     size_t                threads[2];
+    size_t                localThreads[2];
     float                filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
     int                    filter_w = 3, filter_h = 3;
     int                    err = 0;
@@ -183,10 +186,19 @@
     threads[0] = w;
     threads[1] = h;
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
     // allocate the input and output image memory objects
-    memobjs[0] =
-        create_image_2d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                        &image_format_desc, w, h, 0, inptr, &err);
+    memobjs[0] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR),
+                                 &image_format_desc, w, h, 0, inptr, &err );
     if( memobjs[0] == (cl_mem)0 ){
         log_error( " unable to create 2D image using create_image_2d\n" );
         return -1;
@@ -200,9 +212,8 @@
     // allocate an array memory object to load the filter weights
-    memobjs[2] = clCreateBuffer(
-        context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-        sizeof(cl_float) * filter_w * filter_h, &filter_weights, &err);
+    memobjs[2] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR ),
+                               sizeof( cl_float ) * filter_w * filter_h, &filter_weights, &err );
     if( memobjs[2] == (cl_mem)0 ){
         log_error( " unable to create array using clCreateBuffer\n" );
         clReleaseMemObject( memobjs[1] );
@@ -238,7 +249,11 @@
         return -1;
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, &executeEvent );
     err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, &executeEvent );
     if( err != CL_SUCCESS ){
         print_error( err, "clEnqueueNDRangeKernel failed\n" );
diff --git a/test_conformance/profiling/execute_multipass.cpp b/test_conformance/profiling/execute_multipass.cpp
index a264232..7051244 100644
--- a/test_conformance/profiling/execute_multipass.cpp
+++ b/test_conformance/profiling/execute_multipass.cpp
@@ -122,18 +122,14 @@
     // allocate the input and output image memory objects
-    memobjs[0] =
-        create_image_3d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                        &image_format_desc, w, h, d, 0, 0, inptr, &err);
+    memobjs[0] = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), &image_format_desc, w, h, d, 0, 0, inptr, &err );
     if( memobjs[0] == (cl_mem)0 ){
         log_error( " unable to create 2D image using create_image_2d\n" );
         return -1;
     // allocate an array memory object to load the filter weights
-    memobjs[1] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_float) * w * h * d * nChannels, NULL, &err);
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_WRITE ), sizeof( cl_float ) * w*h*d*nChannels, NULL, &err );
     if( memobjs[1] == (cl_mem)0 ){
         log_error( " unable to create array using clCreateBuffer\n" );
         clReleaseMemObject( memobjs[0] );
diff --git a/test_conformance/profiling/main.cpp b/test_conformance/profiling/main.cpp
index 6e59f61..bd36797 100644
--- a/test_conformance/profiling/main.cpp
+++ b/test_conformance/profiling/main.cpp
@@ -126,7 +126,6 @@
 int main( int argc, const char *argv[] )
-    return runTestHarness(argc, argv, test_num, test_list, false,
-                          CL_QUEUE_PROFILING_ENABLE);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, CL_QUEUE_PROFILING_ENABLE );
diff --git a/test_conformance/profiling/readArray.cpp b/test_conformance/profiling/readArray.cpp
index 85ab9a2..04bccbf 100644
--- a/test_conformance/profiling/readArray.cpp
+++ b/test_conformance/profiling/readArray.cpp
@@ -622,12 +622,25 @@
     cl_event        readEvent;
     cl_ulong    queueStart, submitStart, readStart, readEnd;
     size_t            threads[1];
+    size_t            localThreads[1];
     int                err, err_count = 0;
     int                i;
     size_t            ptrSizes[5];
     threads[0] = (size_t)num_elements;
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    if( err != CL_SUCCESS ){
+        log_error( "Unable to get thread group max size: %d", err );
+        return -1;
+    }
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
     ptrSizes[0] = size;
     ptrSizes[1] = ptrSizes[0] << 1;
     ptrSizes[2] = ptrSizes[1] << 1;
@@ -639,8 +652,7 @@
             log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
             return -1;
-        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                    ptrSizes[i] * num_elements, NULL, &err);
+        streams[i] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err );
         if( !streams[i] ){
             log_error( " clCreateBuffer failed\n" );
             free( outptr[i] );
@@ -664,8 +676,11 @@
             return -1;
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
         if( err != CL_SUCCESS ){
             print_error( err, "clEnqueueNDRangeKernel failed" );
             clReleaseKernel( kernel[i] );
@@ -878,3 +893,105 @@
     return test_stream_read( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1,
                              stream_read_struct_kernel_code, struct_kernel_name, foo );
+int read_struct_array(cl_device_group device, cl_device id, cl_context context, int num_elements)
+    cl_mem            streams[1];
+    TestStruct        *output_ptr;
+    cl_program        program[1];
+    cl_kernel        kernel[1];
+    void            *values[1];
+    size_t            sizes[1] = { sizeof(cl_stream) };
+    size_t            threads[1];
+    size_t            localThreads[1];
+    int                err;
+    size_t            objSize = sizeof(TestStruct);
+    threads[0] = (size_t)num_elements;
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    if( err != CL_SUCCESS ){
+        log_error( "Unable to get thread group max size: %d", err );
+        return -1;
+    }
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    output_ptr = malloc(objSize * num_elements);
+    if( ! output_ptr ){
+        log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
+        return -1;
+    }
+    streams[0] = clCreateBuffer( device, (cl_mem_flags)(CL_MEM_READ_WRITE),  objSize * num_elements, NULL );
+    if( !streams[0] ){
+        log_error( " clCreateBuffer failed\n" );
+        free( output_ptr );
+        return -1;
+    }
+    err = create_program_and_kernel( device, stream_read_struct_kernel_code, "test_stream_read_struct", &program[0], &kernel[0]);
+    if( err ){
+        clReleaseProgram( program[0] );
+        free( output_ptr );
+        return -1;
+    }
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&streams[0] );
+    if( err != CL_SUCCESS){
+        print_error( err, "clSetKernelArg failed" );
+        clReleaseProgram( program[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseMemObject( streams[0] );
+        free( output_ptr );
+        return -1;
+    }
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, localThreads, 0, NULL, NULL );
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseProgram( program[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseMemObject( streams[0] );
+        free( output_ptr );
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, streams[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if( err != CL_SUCCESS){
+        print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseProgram( program[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseMemObject( streams[0] );
+        free( output_ptr );
+        return -1;
+    }
+    if (verify_read_struct(output_ptr, num_elements)){
+        log_error(" struct test failed\n");
+        err = -1;
+    }
+    else{
+        log_info(" struct test passed\n");
+        err = 0;
+    }
+    // cleanup
+    clReleaseProgram( program[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseMemObject( streams[0] );
+    free( output_ptr );
+    return err;
diff --git a/test_conformance/profiling/readImage.cpp b/test_conformance/profiling/readImage.cpp
index 9ba6b47..c1a0894 100644
--- a/test_conformance/profiling/readImage.cpp
+++ b/test_conformance/profiling/readImage.cpp
@@ -130,6 +130,9 @@
     cl_event        readEvent;
     cl_ulong        queueStart, submitStart, readStart, readEnd;
     size_t          threads[2];
+    size_t          localThreads[2];
     int                err;
     int                w = 64, h = 64;
     cl_mem_flags    flags;
@@ -147,6 +150,16 @@
     threads[0] = (size_t)w;
     threads[1] = (size_t)h;
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
     d = init_genrand( gRandomSeed );
     if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
         inptr = (void *)generateSignedImage( w * h * 4, d );
@@ -167,7 +180,7 @@
     // allocate the input and output image memory objects
-    flags = CL_MEM_READ_WRITE;
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
     memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
     if( memobjs[0] == (cl_mem)0 ){
         free( dst );
@@ -176,8 +189,7 @@
         return -1;
-    memobjs[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                channel_nbytes * 4 * w * h, NULL, &err);
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  channel_nbytes * 4 * w * h, NULL, &err );
     if( memobjs[1] == (cl_mem)0 ){
         free( dst );
         free( (void *)inptr );
@@ -219,8 +231,11 @@
         return -1;
+    err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
     err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
     if( err != CL_SUCCESS ){
         print_error( err, "clEnqueueNDRangeKernel failed" );
         clReleaseKernel( kernel[0] );
diff --git a/test_conformance/profiling/writeArray.cpp b/test_conformance/profiling/writeArray.cpp
index acfe8f2..550294a 100644
--- a/test_conformance/profiling/writeArray.cpp
+++ b/test_conformance/profiling/writeArray.cpp
@@ -25,6 +25,8 @@
 #include "harness/errorHelpers.h"
 #include "harness/conversions.h"
+//#define USE_LOCAL_THREADS    1
 #ifndef uchar
 typedef unsigned char uchar;
@@ -551,8 +553,8 @@
 static int verify_write_half( void *ptr1, void *ptr2, int n )
     int        i;
-    cl_half *inptr = (cl_half *)ptr1;
-    cl_half *outptr = (cl_half *)ptr2;
+    cl_ushort    *inptr = (cl_ushort *)ptr1;
+    cl_ushort    *outptr = (cl_ushort *)ptr2;
     for( i = 0; i < n; i++ ){
         if( outptr[i] != inptr[i] )
@@ -619,11 +621,24 @@
     cl_ulong    queueStart, submitStart, writeStart, writeEnd;
     size_t            ptrSizes[5], outPtrSizes[5];
     size_t            threads[1];
+    size_t            localThreads[1];
     int                err, err_count = 0;
     int                i, ii;
     threads[0] = (size_t)num_elements;
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, " Unable to get thread group max size" );
+        return -1;
+    }
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
     ptrSizes[0] = size;
     ptrSizes[1] = ptrSizes[0] << 1;
     ptrSizes[2] = ptrSizes[1] << 1;
@@ -639,8 +654,7 @@
     for( i = 0; i < loops; i++ ){
         ii = i << 1;
-        streams[ii] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                     ptrSizes[i] * num_elements, NULL, &err);
+        streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err );
         if( ! streams[ii] ){
             free( outptr[i] );
             log_error( " clCreateBuffer failed\n" );
@@ -648,15 +662,11 @@
         if( ! strcmp( type, "half" ) ){
             outptr[i] = malloc( outPtrSizes[i] * num_elements * 2 );
-            streams[ii + 1] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               outPtrSizes[i] * 2 * num_elements, NULL, &err);
+            streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  outPtrSizes[i] * 2 * num_elements, NULL, &err );
             outptr[i] = malloc( outPtrSizes[i] * num_elements );
-            streams[ii + 1] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               outPtrSizes[i] * num_elements, NULL, &err);
+            streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  outPtrSizes[i] * num_elements, NULL, &err );
         if( ! streams[ii+1] ){
@@ -754,8 +764,11 @@
             return -1;
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
         if( err != CL_SUCCESS ){
             print_error( err, " clEnqueueNDRangeKernel failed" );
@@ -800,7 +813,7 @@
         if( !err2 )
-            log_info(" %s%d data verified\n", type, 1 << i);
+            log_info( " %s%d data verified\n", type, 1<<i );
         err = err2;
@@ -821,6 +834,150 @@
 }    // end test_stream_write()
+ int test_stream_struct_write( cl_device_group device, cl_device id, cl_context context, int num_elements )
+ {
+ cl_mem            streams[10];
+ void            *outptr[5];
+ TestStruct        *inptr[5];
+ cl_program        program[5];
+ cl_kernel        kernel[5];
+ void            *values[2];
+ size_t            sizes[2] = { sizeof(cl_stream), sizeof(cl_stream) };
+ size_t            ptrSizes[5];
+ size_t            size = sizeof( TestStruct );
+ size_t            threads[1];
+ size_t            localThreads[1];
+ #endif
+ int                err;
+ int                i, ii, j;
+ int                loops = 1;        // no vector for structs
+ threads[0] = (size_t)num_elements;
+ err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+ if( err != CL_SUCCESS ){
+ log_error( "Unable to get thread group max size: %d", err );
+ return -1;
+ }
+ if( localThreads[0] > threads[0] )
+ localThreads[0] = threads[0];
+ #endif
+ ptrSizes[0] = size;
+ ptrSizes[1] = ptrSizes[0] << 1;
+ ptrSizes[2] = ptrSizes[1] << 1;
+ ptrSizes[3] = ptrSizes[2] << 1;
+ ptrSizes[4] = ptrSizes[3] << 1;
+ loops = ( loops < 5 ? loops : 5 );
+ for( i = 0; i < loops; i++ ){
+ inptr[i] = (TestStruct *)malloc(ptrSizes[i] * num_elements);
+ for( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ ){
+ inptr[i][j].a = (int)random_float( -2147483648.f, 2147483647.0f );
+ inptr[i][j].b = random_float( -FLT_MAX, FLT_MAX );
+ }
+ ii = i << 1;
+ streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL);
+ if( ! streams[ii] ){
+ free( outptr[i] );
+ log_error( " clCreateBuffer failed\n" );
+ return -1;
+ }
+ outptr[i] = malloc( ptrSizes[i] * num_elements );
+ streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL);
+ if( ! streams[ii+1] ){
+ clReleaseMemObject(streams[ii]);
+ free( outptr[i] );
+ log_error( " clCreateBuffer failed\n" );
+ return -1;
+ }
+ err = clWriteArray(context, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], NULL);
+ if( err != CL_SUCCESS ){
+ clReleaseMemObject(streams[ii]);
+ clReleaseMemObject(streams[ii+1]);
+ free( outptr[i] );
+ print_error( err, " clWriteArray failed" );
+ return -1;
+ }
+ err = create_program_and_kernel( device, struct_kernel_code, "read_write_struct", &program[i], &kernel[i] );
+ if( err ){
+ clReleaseMemObject(streams[ii]);
+ clReleaseMemObject(streams[ii+1]);
+ free( outptr[i] );
+ log_error( " Error creating program for struct\n" );
+ return -1;
+ }
+ err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] );
+ err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] );
+ if (err != CL_SUCCESS){
+ clReleaseProgram( program[i] );
+ clReleaseKernel( kernel[i] );
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ free( outptr[i] );
+ print_error( err, " clSetKernelArg failed" );
+ return -1;
+ }
+ err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
+ #else
+ err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+ #endif
+ if( err != CL_SUCCESS ){
+ print_error( err, " clEnqueueNDRangeKernel failed" );
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ clReleaseKernel( kernel[i] );
+ clReleaseProgram( program[i] );
+ free( outptr[i] );
+ return -1;
+ }
+ err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+ if( err != CL_SUCCESS ){
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ clReleaseKernel( kernel[i] );
+ clReleaseProgram( program[i] );
+ free( outptr[i] );
+ print_error( err, " clEnqueueReadBuffer failed" );
+ return -1;
+ }
+ if( verify_write_struct( inptr[i], outptr[i], ptrSizes[i] * num_elements / ptrSizes[0] ) ){
+ log_error( " STREAM_WRITE struct%d test failed\n", 1<<i );
+ err = -1;
+ }
+ else{
+ log_info( " STREAM_WRITE struct%d test passed\n", 1<<i );
+ err = 0;
+ }
+ // cleanup
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ clReleaseKernel( kernel[i] );
+ clReleaseProgram( program[i] );
+ free( outptr[i] );
+ free( (void *)inptr[i] );
+ }
+ return err;
+ }    // end test_stream_struct_write()
+ */
 int test_write_array_int( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
     int    *inptr[5];
diff --git a/test_conformance/profiling/writeImage.cpp b/test_conformance/profiling/writeImage.cpp
index fbc8fbc..5085896 100644
--- a/test_conformance/profiling/writeImage.cpp
+++ b/test_conformance/profiling/writeImage.cpp
@@ -413,6 +413,9 @@
     cl_event        writeEvent;
     cl_ulong    queueStart, submitStart, writeStart, writeEnd;
     size_t    threads[2];
+    size_t    localThreads[2];
     int                err;
     int                w = 64, h = 64;
     cl_mem_flags    flags;
@@ -433,6 +436,16 @@
     threads[0] = (size_t)w;
     threads[1] = (size_t)h;
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
     d = init_genrand( gRandomSeed );
     if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
         inptr = (void *)generateSignedImage( w * h * 4, d );
@@ -452,7 +465,7 @@
     // allocate the input and output image memory objects
-    flags = CL_MEM_READ_WRITE;
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
     memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
     if( memobjs[0] == (cl_mem)0 ){
         free( dst );
@@ -461,8 +474,7 @@
         return -1;
-    memobjs[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                channel_nbytes * 4 * w * h, NULL, &err);
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  channel_nbytes * 4 * w * h, NULL, &err );
     if( memobjs[1] == (cl_mem)0 ){
         free( dst );
         free( (void *)inptr );
@@ -569,8 +581,11 @@
         return -1;
+    err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
     err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
     if( err != CL_SUCCESS ){
         print_error( err, "clEnqueueNDRangeKernel failed" );
diff --git a/test_conformance/relationals/main.cpp b/test_conformance/relationals/main.cpp
index 61bde2d..ec495c8 100644
--- a/test_conformance/relationals/main.cpp
+++ b/test_conformance/relationals/main.cpp
@@ -70,6 +70,6 @@
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/relationals/test_comparisons_double.cpp b/test_conformance/relationals/test_comparisons_double.cpp
index 3fe1124..9dc737f 100644
--- a/test_conformance/relationals/test_comparisons_double.cpp
+++ b/test_conformance/relationals/test_comparisons_double.cpp
@@ -151,17 +151,13 @@
     generate_equiv_test_data_double( inDataA, vecSize, true, d );
     generate_equiv_test_data_double( inDataB, vecSize, false, d );
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_double) * vecSize * TEST_SIZE,
-                                &inDataA, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_double ) * vecSize * TEST_SIZE, &inDataA, &error);
     if( streams[0] == NULL )
         print_error( error, "Creating input array A failed!\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_double) * vecSize * TEST_SIZE,
-                                &inDataB, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_double ) * vecSize * TEST_SIZE, &inDataB, &error);
     if( streams[1] == NULL )
         print_error( error, "Creating input array A failed!\n");
diff --git a/test_conformance/relationals/test_comparisons_float.cpp b/test_conformance/relationals/test_comparisons_float.cpp
index 989c70c..e8178d8 100644
--- a/test_conformance/relationals/test_comparisons_float.cpp
+++ b/test_conformance/relationals/test_comparisons_float.cpp
@@ -158,17 +158,13 @@
     generate_equiv_test_data_float( inDataA, vecSize, true, d );
     generate_equiv_test_data_float( inDataB, vecSize, false, d );
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_float) * vecSize * TEST_SIZE,
-                                &inDataA, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_float ) * vecSize * TEST_SIZE, &inDataA, &error);
     if( streams[0] == NULL )
         print_error( error, "Creating input array A failed!\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_float) * vecSize * TEST_SIZE,
-                                &inDataB, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_float ) * vecSize * TEST_SIZE, &inDataB, &error);
     if( streams[1] == NULL )
         print_error( error, "Creating input array A failed!\n");
diff --git a/test_conformance/relationals/test_relationals.cpp b/test_conformance/relationals/test_relationals.cpp
index 5a874af..7095240 100644
--- a/test_conformance/relationals/test_relationals.cpp
+++ b/test_conformance/relationals/test_relationals.cpp
@@ -89,19 +89,13 @@
     generate_random_data( vecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataA );
     memset( clearData, 0, sizeof( clearData ) );
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                get_explicit_type_size(vecType)
-                                    * g_vector_aligns[vecSize] * TEST_SIZE,
-                                &inDataA, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataA, &error);
     if( streams[0] == NULL )
         print_error( error, "Creating input array A failed!\n");
         return -1;
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_int) * g_vector_aligns[vecSize] * TEST_SIZE,
-                       clearData, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * g_vector_aligns[vecSize] * TEST_SIZE, clearData, &error );
     if( streams[1] == NULL )
         print_error( error, "Creating output array failed!\n");
@@ -369,28 +363,19 @@
     generate_random_data( vecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataB );
     generate_random_data( testVecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataC );
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                get_explicit_type_size(vecType)
-                                    * g_vector_aligns[vecSize] * TEST_SIZE,
-                                &inDataA, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataA, &error);
     if( streams[0] == NULL )
         print_error( error, "Creating input array A failed!\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                get_explicit_type_size(vecType)
-                                    * g_vector_aligns[vecSize] * TEST_SIZE,
-                                &inDataB, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataB, &error);
     if( streams[1] == NULL )
         print_error( error, "Creating input array A failed!\n");
         return -1;
-    streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                get_explicit_type_size(testVecType)
-                                    * g_vector_aligns[vecSize] * TEST_SIZE,
-                                &inDataC, &error);
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( testVecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataC, &error);
     if( streams[2] == NULL )
         print_error( error, "Creating input array A failed!\n");
diff --git a/test_conformance/relationals/test_shuffles.cpp b/test_conformance/relationals/test_shuffles.cpp
index 5fd3b6c..c784b65 100644
--- a/test_conformance/relationals/test_shuffles.cpp
+++ b/test_conformance/relationals/test_shuffles.cpp
@@ -621,22 +621,16 @@
     if( shuffleMode == kBuiltInDualInputFnMode )
         generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inSecondData );
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       typeSize * inVecSize * numOrders, inData, &error);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * inVecSize * numOrders, inData, &error);
     test_error( error, "Unable to create input stream" );
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       typeSize * outRealVecSize * numOrders, outData, &error);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * outRealVecSize * numOrders, outData, &error);
     test_error( error, "Unable to create output stream" );
     int argIndex = 0;
     if( shuffleMode == kBuiltInDualInputFnMode )
-        streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                    typeSize * inVecSize * numOrders,
-                                    inSecondData, &error);
+        streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * inVecSize * numOrders, inSecondData, &error);
         test_error( error, "Unable to create second input stream" );
         error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 2 ] ), &streams[ 2 ] );
diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 35f154a..518d030 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -20,7 +20,7 @@
 #include <time.h>
 #include <string.h>
 #if ! defined( _WIN32)
-#if defined(__APPLE__)
+#if ! defined( __ANDROID__ )
 #include <sys/sysctl.h>
@@ -278,11 +278,8 @@
     // create program
     cl_program program;
-    const char **psrc = vec_len == 3 ? sourceV3 : source;
-    size_t src_size = vec_len == 3 ? ARRAY_SIZE(sourceV3) : ARRAY_SIZE(source);
-    if (create_single_kernel_helper(context, &program, kernel_ptr, src_size,
-                                    psrc, testname))
+    if (create_single_kernel_helper(context, &program, kernel_ptr, (cl_uint)(vec_len == 3 ? sizeof(sourceV3) / sizeof(sourceV3[0]) : sizeof(source) / sizeof(source[0])), vec_len == 3 ? sourceV3 : source, testname))
         log_error("Failed to build program (%d)\n", err);
         return NULL;
@@ -343,7 +340,7 @@
         programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, cmptype, element_count[vecsize] );
         if (!programs[vecsize] || !kernels[vecsize]) {
-            ++s_test_cnt;
+            ++s_test_cnt;

             return -1;
@@ -645,7 +642,7 @@
         log_info("*** Wimpy Reduction Factor: %-27u ***\n\n", s_wimpy_reduction_factor);
-    int err = runTestHarness(argCount, argList, test_num, test_list, false, 0);
+    int err = runTestHarness( argCount, argList, test_num, test_list, false, false, 0 );
     free( argList );
diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp
index f9641e9..71c58bc 100644
--- a/test_conformance/select/util_select.cpp
+++ b/test_conformance/select/util_select.cpp
@@ -561,18 +561,13 @@
     const cl_uchar *c = (const cl_uchar *) correct;
     size_t i;
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_uchar) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%2.2x vs 0x%2.2x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+    for(i = 0; i < count; i++)
+        if (t[i] != c[i]) {
+            log_error("\n(check_uchar) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i]);
+            return i + 1;
+        }
     return 0;
@@ -581,18 +576,13 @@
     const cl_char *c = (const cl_char *) correct;
     size_t i;
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_char) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%2.2x vs 0x%2.2x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] ) {
+            log_error("\n(check_char) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
     return 0;
@@ -602,18 +592,13 @@
     const cl_ushort *c = (const cl_ushort *) correct;
     size_t i;
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_ushort) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%4.4x vs 0x%4.4x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+    for( i = 0; i < count; i++ )
+        if(t[i] != c[i]) {
+            log_error("\n(check_ushort) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%4.4x vs 0x%4.4x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
     return 0;
@@ -623,18 +608,13 @@
     const cl_short *c = (const cl_short *) correct;
     size_t i;
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_short) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%8.8x vs 0x%8.8x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+    for (i = 0; i < count; i++)
+        if(t[i] != c[i]) {
+            log_error("\n(check_short) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
     return 0;
@@ -644,18 +624,14 @@
     const cl_uint *c = (const cl_uint *) correct;
     size_t i;
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_uint) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%8.8x vs 0x%8.8x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+    for (i = 0; i < count; i++)
+        if(t[i] != c[i]) {
+            log_error("\n(check_uint) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
     return 0;
@@ -665,19 +641,24 @@
     const cl_int *c = (const cl_int *) correct;
     size_t i;
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_int) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%8.8x vs 0x%8.8x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
+    for(i = 0; i < count; i++)
+        if( t[i] != c[i] ) {
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]);
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i+1, count,c[i+1], t[i+1]);
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i+2, count,c[i+2], t[i+2]);
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i+3, count,c[i+3], t[i+3]);
+            if(i) {
+                log_error("\n(check_int) Error for vector size %ld found just after 0x%8.8lx:  "
+                          "*0x%8.8x vs 0x%8.8x\n", vector_size, i-1, c[i-1], t[i-1]);
-    }
+            return i + 1;
+        }
     return 0;
@@ -687,18 +668,13 @@
     const cl_ulong *c = (const cl_ulong *) correct;
     size_t i;
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_ulong) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%16.16llx vs 0x%16.16llx\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] ) {
+            log_error("\n(check_ulong) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
     return 0;
@@ -708,18 +684,13 @@
     const cl_long *c = (const cl_long *) correct;
     size_t i;
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++)
-            if (t[i] != c[i])
-            {
-                log_error("\n(check_long) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%16.16llx vs 0x%16.16llx\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+    for(i = 0; i < count; i++ )
+        if(t[i] != c[i]) {
+            log_error("\n(check_long) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
     return 0;
@@ -729,19 +700,14 @@
     const cl_uint *c = (const cl_uint *) correct;
     size_t i;
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++) /* Allow nans to be binary different */
-            if ((t[i] != c[i])
-                && !(isnan(((float *)correct)[i]) && isnan(((float *)test)[i])))
-            {
-                log_error("\n(check_float) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%8.8x vs 0x%8.8x\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+    for( i = 0; i < count; i++ )
+        /* Allow nans to be binary different */
+        if ((t[i] != c[i]) && !(isnan(((float *)correct)[i]) && isnan(((float *)test)[i]))) {
+            log_error("\n(check_float) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
     return 0;
@@ -751,20 +717,15 @@
     const cl_ulong *c = (const cl_ulong *) correct;
     size_t i;
-    if (memcmp(t, c, count * sizeof(c[0])) != 0)
-    {
-        for (i = 0; i < count; i++) /* Allow nans to be binary different */
-            if ((t[i] != c[i])
-                && !(isnan(((double *)correct)[i])
-                     && isnan(((double *)test)[i])))
-            {
-                log_error("\n(check_double) Error for vector size %ld found at "
-                          "0x%8.8lx (of 0x%8.8lx):  "
-                          "*0x%16.16llx vs 0x%16.16llx\n",
-                          vector_size, i, count, c[i], t[i]);
-                return i + 1;
-            }
-    }
+    for( i = 0; i < count; i++ )
+        /* Allow nans to be binary different */
+        if ((t[i] != c[i]) && !(isnan(((double *)correct)[i]) && isnan(((double *)test)[i]))) {
+            log_error("\n(check_double) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
     return 0;
diff --git a/test_conformance/spir/CMakeLists.txt b/test_conformance/spir/CMakeLists.txt
index f65c031..70effa1 100644
--- a/test_conformance/spir/CMakeLists.txt
+++ b/test_conformance/spir/CMakeLists.txt
@@ -9,7 +9,7 @@
-    ../math_brute_force/function_list.cpp
+    ../math_brute_force/FunctionList.cpp
diff --git a/test_conformance/spir/kernelargs.h b/test_conformance/spir/kernelargs.h
index 7c5673e..2aa86d7 100644
--- a/test_conformance/spir/kernelargs.h
+++ b/test_conformance/spir/kernelargs.h
@@ -348,9 +348,9 @@
         if( NULL != buffer )
             int error = CL_SUCCESS;
-            m_memObj = clCreateBuffer(context,
-                                      CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                                      size, buffer, &error);
+            m_memObj = clCreateBuffer( context,
+                                       (cl_mem_flags)( CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR ),
+                                       size, buffer, &error );
             if( error != CL_SUCCESS )
                 throw Exceptions::TestError("clCreateBuffer failed\n", error);
diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp
index 3a18988..5634d5b 100644
--- a/test_conformance/spir/main.cpp
+++ b/test_conformance/spir/main.cpp
@@ -141,8 +141,7 @@
     return false;
-static void get_spir_version(cl_device_id device,
-                             std::vector<Version> &versions)
+static void get_spir_version(cl_device_id device, std::vector<float>& versions)
     char version[64] = {0};
     cl_int err;
@@ -163,11 +162,11 @@
-    for (auto &v : versionVector)
+    for(std::list<std::string>::const_iterator it = versionVector.begin(),
+                                               e  = versionVector.end(); it != e;
+                                               it++)
-        auto major = v[v.find('.') - 1];
-        auto minor = v[v.find('.') + 1];
-        versions.push_back(Version{ major - '0', minor - '0' });
+        versions.push_back(atof(it->c_str()));
@@ -6930,12 +6929,10 @@
         cl_device_id device = get_platform_device(device_type, choosen_device_index, choosen_platform_index);
-        std::vector<Version> versions;
+        std::vector<float> versions;
         get_spir_version(device, versions);
-        if (!is_extension_available(device, "cl_khr_spir")
-            || (std::find(versions.begin(), versions.end(), Version{ 1, 2 })
-                == versions.end()))
+        if (!is_extension_available( device, "cl_khr_spir") ||
+            std::find(versions.begin(), versions.end(), 1.2f) == versions.end())
             log_info("Spir extension version 1.2 is not supported by the device\n");
             return 0;
diff --git a/test_conformance/spir/run_build_test.cpp b/test_conformance/spir/run_build_test.cpp
index 9264d3a..cec2d27 100644
--- a/test_conformance/spir/run_build_test.cpp
+++ b/test_conformance/spir/run_build_test.cpp
@@ -33,12 +33,12 @@
 #include "harness/clImageHelper.h"
 #include "harness/os_helpers.h"
-#include "../math_brute_force/function_list.h"
-#include "datagen.h"
 #include "exceptions.h"
 #include "kernelargs.h"
-#include "run_build_test.h"
+#include "datagen.h"
 #include "run_services.h"
+#include "run_build_test.h"
+#include "../math_brute_force/FunctionList.h"
 #include <CL/cl.h>
 // Task
diff --git a/test_conformance/spir/run_services.cpp b/test_conformance/spir/run_services.cpp
index 06fc418..257dbff 100644
--- a/test_conformance/spir/run_services.cpp
+++ b/test_conformance/spir/run_services.cpp
@@ -302,11 +302,18 @@
 std::string& DataRow::operator[](int column)
-    assert((column > -1 && (size_t)column <= m_row.size())
-           && "Index out of bound");
-    if ((size_t)column == m_row.size()) m_row.push_back("");
+    assert((column > -1) && "Index out of bound");
-    return m_row[column];
+    if ((size_t)column < m_row.size())
+        return m_row[column];
+    if (column == m_row.size())
+    {
+        m_row.push_back("");
+        return m_row[column];
+    }
+    assert(0 && "Index out of bound.");
diff --git a/test_conformance/spirv_new/CMakeLists.txt b/test_conformance/spirv_new/CMakeLists.txt
index 7500571..614d5a7 100644
--- a/test_conformance/spirv_new/CMakeLists.txt
+++ b/test_conformance/spirv_new/CMakeLists.txt
@@ -18,8 +18,8 @@
-  ../../test_conformance/math_brute_force/utility.cpp
+  ../../test_conformance/math_brute_force/Utility.cpp
+  )
diff --git a/test_conformance/spirv_new/ b/test_conformance/spirv_new/
deleted file mode 100755
index 99b16ad..0000000
--- a/test_conformance/spirv_new/
+++ /dev/null
@@ -1,164 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2020 The Khronos Group Inc. All Rights Reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Assembles the SPIR-V assembly files used by spirv_new into binaries,
-   and validates them using spirv-val.  Either run this from the parent
-   of the spirv_asm directory, or pass the --source-dir and --output-dir
-   options to specify the locations of the assembly files and the
-   binaries to be generated.
-import argparse
-import glob
-import os
-import subprocess
-import sys
-from textwrap import wrap
-def fatal(message):
-    """Print an error message and exit with a non-zero status, to
-       indicate failure.
-    """
-    print(message)
-    sys.exit(1)
-def assemble_spirv(asm_dir, bin_dir, spirv_as, verbose):
-    """Assemble SPIR-V source into binaries."""
-    if not os.path.exists(bin_dir):
-        os.makedirs(bin_dir)
-    assembly_failures = False
-    for asm_file_path in glob.glob(os.path.join(asm_dir, '*.spvasm*')):
-        asm_file = os.path.basename(asm_file_path)
-        if os.path.isfile(asm_file_path):
-            if verbose:
-                print(' Assembling {}'.format(asm_file))
-            asm_file_root, asm_file_ext = os.path.splitext(asm_file)
-            bin_file = asm_file_root + asm_file_ext.replace('asm', '')
-            bin_file_path = os.path.join(bin_dir, bin_file)
-            command = '"{}" --target-env spv1.0 "{}" -o "{}"'.format(
-                spirv_as, asm_file_path, bin_file_path)
-            if, shell=True) != 0:
-                assembly_failures = True
-                print('ERROR: Failure assembling {}: '
-                      'see above output.'.format(
-                          asm_file))
-                print()
-    if assembly_failures:
-        fatal('\n'.join(wrap(
-            'ERROR: Assembly failure(s) occurred.  See above for error '
-            'messages from the assembler, if any.')))
-def validate_spirv(bin_dir, spirv_val, verbose):
-    """Validates SPIR-V binaries.  Ignores known failures."""
-    validation_failures = False
-    for bin_file_path in glob.glob(os.path.join(bin_dir, '*.spv*')):
-        bin_file = os.path.basename(bin_file_path)
-        if os.path.isfile(bin_file_path):
-            if verbose:
-                print(' Validating {}'.format(bin_file))
-            command = '"{}" "{}"'.format(
-                spirv_val, bin_file_path)
-            if, shell=True) != 0:
-                print('ERROR: Failure validating {}: '
-                      'see above output.'.format(
-                          bin_file))
-                validation_failures = True
-                print()
-    if validation_failures:
-        fatal('ERROR: Validation failure(s) found.  '
-              'See above for validation output.')
-    else:
-        print('All SPIR-V binaries validated successfully.')
-def parse_args():
-    """Parse the command-line arguments."""
-    argparse_kwargs = (
-        {'allow_abbrev': False} if sys.version_info >= (3, 5) else {})
-    argparse_kwargs['description'] = (
-        '''Assembles the SPIR-V assembly files used by spirv_new into
-           binaries, and validates them using spirv-val.  Either run this
-           from the parent of the spirv_asm directory, or pass the
-           --source-dir and --output-dir options to specify the locations
-           the assembly files and the binaries to be generated.
-        ''')
-    parser = argparse.ArgumentParser(**argparse_kwargs)
-    parser.add_argument('-s', '--source-dir', metavar='DIR',
-                        default='spirv_asm',
-                        help='''specifies the directory containing SPIR-V
-                                assembly files''')
-    parser.add_argument('-o', '--output-dir', metavar='DIR',
-                        default='spirv_bin',
-                        help='''specifies the directory in which to
-                                output SPIR-V binary files''')
-    parser.add_argument('-a', '--assembler', metavar='PROGRAM',
-                        default='spirv-as',
-                        help='''specifies the program to use for assembly
-                                of SPIR-V, defaults to spirv-as''')
-    parser.add_argument('-l', '--validator', metavar='PROGRAM',
-                        default='spirv-val',
-                        help='''specifies the program to use for validation
-                                of SPIR-V, defaults to spirv-val''')
-    parser.add_argument('-k', '--skip-validation', action='store_true',
-                        default=False,
-                        help='skips validation of the genareted SPIR-V')
-    parser.add_argument('-v', '--verbose', action='store_true', default=False,
-                        help='''enable verbose output (i.e. prints the
-                                name of each SPIR-V assembly file or
-                                binary as it is assembled or validated)
-                             ''')
-    return parser.parse_args()
-def main():
-    """Main function.  Assembles and validates SPIR-V."""
-    args = parse_args()
-    print('Assembling SPIR-V source into binaries...')
-    assemble_spirv(args.source_dir, args.output_dir, args.assembler,
-                   args.verbose)
-    print('Finished assembling SPIR-V binaries.')
-    print()
-    if args.skip_validation:
-        print('Skipping validation of SPIR-V binaries as requested.')
-    else:
-        print('Validating SPIR-V binaries...')
-        validate_spirv(args.output_dir, args.validator, args.verbose)
-    print()
-    print('Done.')
-if __name__ == '__main__':
-    main()
diff --git a/test_conformance/spirv_new/main.cpp b/test_conformance/spirv_new/main.cpp
index 5a8664b..42a1251 100644
--- a/test_conformance/spirv_new/main.cpp
+++ b/test_conformance/spirv_new/main.cpp
@@ -74,15 +74,14 @@
     return testDefinitions.size();
-void spirvTestsRegistry::addTestClass(baseTestClass *test, const char *testName,
-                                      Version version)
+void spirvTestsRegistry::addTestClass(baseTestClass *test, const char *testName)
     test_definition testDef;
     testDef.func = test->getFunction(); = testName;
-    testDef.min_version = version;
+    testDef.min_version = Version(2, 1);
@@ -136,63 +135,27 @@
     return err;
-int get_program_with_il(clProgramWrapper &prog, const cl_device_id deviceID,
-                        const cl_context context, const char *prog_name,
-                        spec_const spec_const_def)
+int get_program_with_il(clProgramWrapper &prog,
+                        const cl_device_id deviceID,
+                        const cl_context context,
+                        const char *prog_name)
     cl_int err = 0;
-    if (gCompilationMode == kBinary)
-    {
+    if (gCompilationMode == kBinary) {
         return offline_get_program_with_il(prog, deviceID, context, prog_name);
     std::vector<unsigned char> buffer_vec = readSPIRV(prog_name);
     int file_bytes = buffer_vec.size();
-    if (file_bytes == 0)
-    {
+    if (file_bytes == 0) {
         log_error("File %s not found\n", prog_name);
         return -1;
     unsigned char *buffer = &buffer_vec[0];
-    if (gCoreILProgram)
-    {
-        prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
-            err, "Failed to create program with clCreateProgramWithIL");
-        if (spec_const_def.spec_value != NULL)
-        {
-            err = clSetProgramSpecializationConstant(
-                prog, spec_const_def.spec_id, spec_const_def.spec_size,
-                spec_const_def.spec_value);
-            SPIRV_CHECK_ERROR(
-                err, "Failed to run clSetProgramSpecializationConstant");
-        }
-    }
-    else
-    {
-        cl_platform_id platform;
-        err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
-                              sizeof(cl_platform_id), &platform, NULL);
-        SPIRV_CHECK_ERROR(err,
-                          "Failed to get platform info with clGetDeviceInfo");
-        clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
-        clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                platform, "clCreateProgramWithILKHR");
-        if (clCreateProgramWithILKHR == NULL)
-        {
-            log_error(
-                "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
-            return -1;
-        }
-        prog = clCreateProgramWithILKHR(context, buffer, file_bytes, &err);
-            err, "Failed to create program with clCreateProgramWithILKHR");
-    }
+    prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create program with clCreateProgramWithIL");
     err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL);
     SPIRV_CHECK_ERROR(err, "Failed to build program");
@@ -200,27 +163,17 @@
     return err;
-test_status InitCL(cl_device_id id)
+test_status checkAddressWidth(cl_device_id id)
-    test_status spirv_status;
-    bool force = true;
-    spirv_status = check_spirv_compilation_readiness(id);
-    if (spirv_status != TEST_PASS)
-    {
-        return spirv_status;
-    }
+  cl_uint address_bits;
+  cl_uint err = clGetDeviceInfo(id, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), &address_bits, NULL);
+  if(err != CL_SUCCESS){
+    log_error("clGetDeviceInfo failed to get address bits!");
+    return TEST_FAIL;
+  }
-    cl_uint address_bits;
-    cl_uint err = clGetDeviceInfo(id, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
-                                  &address_bits, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clGetDeviceInfo failed to get address bits!");
-        return TEST_FAIL;
-    }
-    gAddrWidth = address_bits == 32 ? "32" : "64";
-    return TEST_PASS;
+  gAddrWidth = address_bits == 32 ? "32" : "64";
+  return TEST_PASS;
 void printUsage() {
@@ -257,8 +210,8 @@
-    return runTestHarnessWithCheck(
-        argc, argv, spirvTestsRegistry::getInstance().getNumTests(),
-        spirvTestsRegistry::getInstance().getTestDefinitions(), false, 0,
-        InitCL);
+    return runTestHarnessWithCheck(argc, argv,
+                          spirvTestsRegistry::getInstance().getNumTests(),
+                          spirvTestsRegistry::getInstance().getTestDefinitions(),
+                          false, 0, checkAddressWidth);
diff --git a/test_conformance/spirv_new/procs.h b/test_conformance/spirv_new/procs.h
index 31c65a3..aa44cdd 100644
--- a/test_conformance/spirv_new/procs.h
+++ b/test_conformance/spirv_new/procs.h
@@ -1,16 +1,14 @@
 Copyright (c) 2016 The Khronos Group Inc. All Rights Reserved.
-This code is protected by copyright laws and contains material proprietary to
-the Khronos Group, Inc. This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not
-be disclosed in whole or in part to third parties, and may not be reproduced,
-republished, distributed, transmitted, displayed, broadcast or otherwise
-exploited in any manner without the express prior written permission of Khronos
-Group. The receipt or possession of this code does not convey any rights to
-reproduce, disclose, or distribute its contents, or to manufacture, use, or sell
-anything that it may describe, in whole or in part other than under the terms of
-the Khronos Adopters Agreement or Khronos Conformance Test Source License
-Agreement as executed between Khronos and the recipient.
+This code is protected by copyright laws and contains material proprietary to the Khronos Group, Inc.
+This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not be disclosed in whole or in part to
+third parties, and may not be reproduced, republished, distributed, transmitted, displayed,
+broadcast or otherwise exploited in any manner without the express prior written permission
+of Khronos Group. The receipt or possession of this code does not convey any rights to reproduce,
+disclose, or distribute its contents, or to manufacture, use, or sell anything that it may describe,
+in whole or in part other than under the terms of the Khronos Adopters Agreement
+or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient.
 #pragma once
@@ -26,17 +24,16 @@
 #include <vector>
-#define SPIRV_CHECK_ERROR(err, fmt, ...)                                       \
-    do                                                                         \
-    {                                                                          \
-        if (err == CL_SUCCESS) break;                                          \
-        log_error("%s(%d): Error %d\n" fmt "\n", __FILE__, __LINE__, err,      \
-                  ##__VA_ARGS__);                                              \
-        return -1;                                                             \
-    } while (0)
+#define SPIRV_CHECK_ERROR(err, fmt, ...) do {               \
+        if (err == CL_SUCCESS) break;                       \
+        log_error("%s(%d): Error %d\n" fmt "\n",            \
+                  __FILE__, __LINE__, err, ##__VA_ARGS__);  \
+        return -1;                                          \
+    } while(0)
-class baseTestClass {
+class baseTestClass
     baseTestClass() {}
     virtual basefn getFunction() = 0;
@@ -48,53 +45,54 @@
     std::vector<test_definition> testDefinitions;
-    static spirvTestsRegistry &getInstance();
+    static spirvTestsRegistry& getInstance();
     test_definition *getTestDefinitions();
     size_t getNumTests();
-    void addTestClass(baseTestClass *test, const char *testName,
-                      Version version);
+    void addTestClass(baseTestClass *test, const char *testName);
     spirvTestsRegistry() {}
-template <typename T> T *createAndRegister(const char *name, Version version)
+template<typename T>
+T* createAndRegister(const char *name)
     T *testClass = new T();
-    spirvTestsRegistry::getInstance().addTestClass((baseTestClass *)testClass,
-                                                   name, version);
+    spirvTestsRegistry::getInstance().addTestClass((baseTestClass *)testClass, name);
     return testClass;
-#define TEST_SPIRV_FUNC_VERSION(name, version)                                 \
-    extern int test_##name(cl_device_id deviceID, cl_context context,          \
-                           cl_command_queue queue, int num_elements);          \
-    class test_##name##_class : public baseTestClass {                         \
-    private:                                                                   \
-        basefn fn;                                                             \
-                                                                               \
-    public:                                                                    \
-        test_##name##_class(): fn(test_##name) {}                              \
-        basefn getFunction() { return fn; }                                    \
-    };                                                                         \
-    test_##name##_class *var_##name =                                          \
-        createAndRegister<test_##name##_class>(#name, version);                \
-    int test_##name(cl_device_id deviceID, cl_context context,                 \
-                    cl_command_queue queue, int num_elements)
+#define TEST_SPIRV_FUNC(name)                           \
+    extern int test_##name(cl_device_id deviceID,       \
+                           cl_context context,          \
+                           cl_command_queue queue,      \
+                           int num_elements);           \
+    class test_##name##_class  : public baseTestClass   \
+    {                                                   \
+    private:                                            \
+        basefn fn;                                      \
+                                                        \
+    public:                                             \
+    test_##name##_class() : fn(test_##name)             \
+        {                                               \
+        }                                               \
+        basefn getFunction()                            \
+        {                                               \
+            return fn;                                  \
+        }                                               \
+    };                                                  \
+    test_##name##_class *var_##name =                   \
+        createAndRegister<test_##name##_class>(#name);  \
+    int test_##name(cl_device_id deviceID,              \
+                    cl_context context,                 \
+                    cl_command_queue queue,             \
+                    int num_elements)
-#define TEST_SPIRV_FUNC(name) TEST_SPIRV_FUNC_VERSION(name, Version(1, 2))
+std::vector<unsigned char> readSPIRV(const char *file_name);
-struct spec_const
-    spec_const(cl_int id = 0, size_t sizet = 0, const void *value = NULL)
-        : spec_id(id), spec_size(sizet), spec_value(value){};
-    cl_int spec_id;
-    size_t spec_size;
-    const void *spec_value;
-int get_program_with_il(clProgramWrapper &prog, const cl_device_id deviceID,
-                        const cl_context context, const char *prog_name,
-                        spec_const spec_const_def = spec_const());
-std::vector<unsigned char> readSPIRV(const char *file_name);
\ No newline at end of file
+int get_program_with_il(clProgramWrapper &prog,
+                        const cl_device_id deviceID,
+                        const cl_context context,
+                        const char *prog_name);
diff --git a/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm32 b/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm32
index 4442255..2336201 100644
--- a/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "atomic_dec_global" %__spirv_GlobalInvocationId
+               OpEntryPoint Kernel %1 "atomic_dec_global"
                OpName %__spirv_GlobalInvocationId "__spirv_GlobalInvocationId"
                OpName %val "val"
                OpName %counter "counter"
@@ -17,13 +17,13 @@
                OpDecorate %__spirv_GlobalInvocationId LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
      %uint_1 = OpConstant %uint 1
    %uint_512 = OpConstant %uint 512
-%__spirv_GlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_GlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
         %val = OpFunctionParameter %_ptr_CrossWorkgroup_uint
     %counter = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm64 b/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm64
index 836a7af..683a0a5 100644
--- a/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/atomic_dec_global.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "atomic_dec_global" %__spirv_GlobalInvocationId
+               OpEntryPoint Kernel %1 "atomic_dec_global"
                OpName %__spirv_GlobalInvocationId "__spirv_GlobalInvocationId"
                OpName %val "val"
                OpName %counter "counter"
@@ -18,7 +18,7 @@
                OpDecorate %__spirv_GlobalInvocationId LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
      %uint_1 = OpConstant %uint 1
    %uint_512 = OpConstant %uint 512
    %ulong_32 = OpConstant %ulong 32
-%__spirv_GlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_GlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
         %val = OpFunctionParameter %_ptr_CrossWorkgroup_uint
     %counter = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm32 b/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm32
index 2aa163e..22ee479 100644
--- a/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "atomic_inc_global" %__spirv_GlobalInvocationId
+               OpEntryPoint Kernel %1 "atomic_inc_global"
                OpName %__spirv_GlobalInvocationId "__spirv_GlobalInvocationId"
                OpName %val "val"
                OpName %counter "counter"
@@ -17,13 +17,13 @@
                OpDecorate %__spirv_GlobalInvocationId LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
      %uint_1 = OpConstant %uint 1
    %uint_512 = OpConstant %uint 512
-%__spirv_GlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_GlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
         %val = OpFunctionParameter %_ptr_CrossWorkgroup_uint
     %counter = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm64 b/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm64
index 2a96efd..cc59bb9 100644
--- a/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/atomic_inc_global.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "atomic_inc_global" %__spirv_GlobalInvocationId
+               OpEntryPoint Kernel %1 "atomic_inc_global"
                OpName %__spirv_GlobalInvocationId "__spirv_GlobalInvocationId"
                OpName %val "val"
                OpName %counter "counter"
@@ -18,7 +18,7 @@
                OpDecorate %__spirv_GlobalInvocationId LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
      %uint_1 = OpConstant %uint 1
    %uint_512 = OpConstant %uint 512
    %ulong_32 = OpConstant %ulong 32
-%__spirv_GlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_GlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
         %val = OpFunctionParameter %_ptr_CrossWorkgroup_uint
     %counter = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm32 b/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm32
index 5b5991a..57419c1 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "branch_conditional" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_conditional"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm64 b/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm64
index 41983a1..44e50fc 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/branch_conditional.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "branch_conditional" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_conditional"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm32 b/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm32
index daeba30..ce37a4d 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "branch_conditional_weighted" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_conditional_weighted"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm64 b/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm64
index 5238726..d30c281 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/branch_conditional_weighted.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "branch_conditional_weighted" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_conditional_weighted"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm32
index 7978899..07b6cd2 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "branch_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm64
index bf44d36..1b24c5d 100644
--- a/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/branch_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "branch_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "branch_simple"
                OpName %in "in"
                OpName %out "out"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm32 b/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm32
index 709d07c..2b6d7ad 100644
--- a/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "composite_construct_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "composite_construct_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
@@ -24,7 +24,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm64 b/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm64
index ce89547..09f6d9b 100644
--- a/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/composite_construct_int4.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "composite_construct_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "composite_construct_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
@@ -27,7 +27,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %17 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm32 b/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm32
index 8ff67a2..e277912 100644
--- a/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "composite_construct_struct" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "composite_construct_struct"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v2uint = OpTypeVector %uint 2
       %uchar = OpTypeInt 8 0
@@ -29,7 +29,7 @@
 %uint_2100480000 = OpConstant %uint 2100480000
 %uint_2100480000_0 = OpConstant %uint 2100480000
          %18 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_11
          %19 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm64 b/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm64
index 6d98de6..ed5422d 100644
--- a/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/composite_construct_struct.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "composite_construct_struct" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "composite_construct_struct"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v2uint = OpTypeVector %uint 2
@@ -32,7 +32,7 @@
 %uint_2100480000 = OpConstant %uint 2100480000
 %uint_2100480000_0 = OpConstant %uint 2100480000
          %20 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_12
          %21 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm32
index 40a6b56..7931958 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %uchar_20 = OpConstant %uchar 20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm64
index 3deb9c4..a356232 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_char_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
    %uchar_20 = OpConstant %uchar 20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm32
index 6d00bf5..3d2651f 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
 %double_3_1415926535897931 = OpConstant %double 3.1415926535897931
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm64
index ed87e28..807916d 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_double_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
 %double_3_1415926535897931 = OpConstant %double 3.1415926535897931
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm32
index 0f7d44c..19c2b51 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_false_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_false_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %bool = OpTypeBool
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -23,7 +23,7 @@
       %false = OpConstantFalse %bool
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm64
index 9a4923c..1c1980e 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_false_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_false_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_false_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
        %bool = OpTypeBool
@@ -26,7 +26,7 @@
       %false = OpConstantFalse %bool
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %16 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm32
index 85cbca7..3286c75 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
 %float_3_14159274 = OpConstant %float 3.14159274
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm64
index 5b7672f..f91b2bf 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_float_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
 %float_3_14159274 = OpConstant %float 3.14159274
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm32
index 47a0ac9..59e4e2c 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
 %half_0x1_ap_1 = OpConstant %half 0x1.ap+1
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm64
index 7a9f1e8..4004708 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_half_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
@@ -25,7 +25,7 @@
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
 %half_0x1_ap_1 = OpConstant %half 0x1.ap+1
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm32
index 46934c0..b80809f 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_v3uint = OpTypePointer CrossWorkgroup %v3uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_v3uint
@@ -23,7 +23,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
          %13 = OpConstantComposite %v3uint %uint_123 %uint_122 %uint_121
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm64
index 25516d2..2c30216 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_int3_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
@@ -27,7 +27,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
          %16 = OpConstantComposite %v3uint %uint_123 %uint_122 %uint_121
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %17 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm32
index 62ba1d8..7e6c2bb 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
@@ -25,7 +25,7 @@
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
          %15 = OpConstantComposite %v4uint %uint_123 %uint_122 %uint_121 %uint_119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %16 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm64
index 3824090..7c5b5fb 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_int4_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
@@ -28,7 +28,7 @@
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
          %17 = OpConstantComposite %v4uint %uint_123 %uint_122 %uint_121 %uint_119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %18 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm32
index a5091de..c5bd8bf 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %uint_123 = OpConstant %uint 123
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm64
index 75362b8..1ddb08a 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_int_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
    %uint_123 = OpConstant %uint 123
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm32
index adf712a..e0a8b6f 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
 %ulong_34359738368 = OpConstant %ulong 34359738368
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm64
index 28e856e..7091209 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_long_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
 %ulong_34359738368 = OpConstant %ulong 34359738368
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm32
index 2f28026..3b53e16 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
 %ushort_32000 = OpConstant %ushort 32000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm64
index d3fc51e..d95032b 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_short_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
 %ushort_32000 = OpConstant %ushort 32000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm32
index ebe84ad..cf617c6 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
   %_struct_9 = OpTypeStruct %uint %uchar
@@ -25,7 +25,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_128 = OpConstant %uchar 128
          %14 = OpConstantComposite %_struct_9 %uint_2100483600 %uchar_128
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm64
index bc37627..2554667 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_int_char_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %uchar = OpTypeInt 8 0
@@ -28,7 +28,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_128 = OpConstant %uchar 128
          %16 = OpConstantComposite %_struct_10 %uint_2100483600 %uchar_128
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %17 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm32
index 0c0e0e0..becd9e6 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Kernel
                OpCapability Linkage
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
   %_struct_9 = OpTypeStruct %uint %float
@@ -24,7 +24,7 @@
   %uint_1024 = OpConstant %uint 1024
 %float_3_1415 = OpConstant %float 3.1415
          %14 = OpConstantComposite %_struct_9 %uint_1024 %float_3_1415
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm64
index f06f277..96d85db 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_int_float_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %float = OpTypeFloat 32
@@ -27,7 +27,7 @@
   %uint_1024 = OpConstant %uint 1024
 %float_3_1415 = OpConstant %float 3.1415
          %16 = OpConstantComposite %_struct_10 %uint_1024 %float_3_1415
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %17 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm32
index 62763d8..23ae86e 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Linkage
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v2uint = OpTypeVector %uint 2
       %uchar = OpTypeInt 8 0
@@ -31,7 +31,7 @@
          %18 = OpConstantComposite %_struct_10 %uint_2100483600 %uchar_128
          %19 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
          %20 = OpConstantComposite %_struct_11 %19 %18
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_11
          %21 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm64
index 277b370..3c3cf2e 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_struct_struct_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v2uint = OpTypeVector %uint 2
@@ -34,7 +34,7 @@
          %20 = OpConstantComposite %_struct_11 %uint_2100483600 %uchar_128
          %21 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
          %22 = OpConstantComposite %_struct_12 %21 %20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_12
          %23 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm32
index b135e13..8b3733d 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_true_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_true_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %bool = OpTypeBool
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -23,7 +23,7 @@
        %true = OpConstantTrue %bool
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm64
index 3fdd7a2..da72dc6 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_true_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_true_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_true_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
        %bool = OpTypeBool
@@ -26,7 +26,7 @@
        %true = OpConstantTrue %bool
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %16 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm32
index c40a600..a62d571 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %uchar_19 = OpConstant %uchar 19
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm64
index c55fe28..bba2551 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_uchar_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
    %uchar_19 = OpConstant %uchar 19
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm32
index 3f430dc..5c8de26 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
  %uint_54321 = OpConstant %uint 54321
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm64
index d64fed7..1271b0f 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_uint_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
  %uint_54321 = OpConstant %uint 54321
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm32
index 04b5054..b69d82a 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
 %ulong_9223372036854775810 = OpConstant %ulong 9223372036854775810
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm64
index f631976..a53f902 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_ulong_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
 %ulong_9223372036854775810 = OpConstant %ulong 9223372036854775810
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm32
index 0cfda35..ed309bf 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "constant_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
 %ushort_65000 = OpConstant %ushort 65000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm64
index 6324051..be52174 100644
--- a/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/constant_ushort_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "constant_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "constant_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
 %ushort_65000 = OpConstant %ushort 65000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm32
index 5545194..5ad09c7 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %uchar_20 = OpConstant %uchar 20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm64
index e933f20..6faa144 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_char_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
    %uchar_20 = OpConstant %uchar 20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm32
index 4b8eb83..3023c94 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
 %double_3_1415926535897931 = OpConstant %double 3.1415926535897931
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm64
index 49837d8..3fa7f96 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_double_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
 %double_3_1415926535897931 = OpConstant %double 3.1415926535897931
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm32
index 268d70e..d7ef29c 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
 %float_3_14159274 = OpConstant %float 3.14159274
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm64
index 0fd6111..4594c1c 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_float_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
 %float_3_14159274 = OpConstant %float 3.14159274
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm32
index f924afc..b0af90a 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
 %half_0x1_ap_1 = OpConstant %half 0x1.ap+1
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm64
index b4babdf..b9eeca9 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_half_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
@@ -25,7 +25,7 @@
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
 %half_0x1_ap_1 = OpConstant %half 0x1.ap+1
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm32
index d8424d2..2f23b9d 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_v3uint = OpTypePointer CrossWorkgroup %v3uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_v3uint
@@ -23,7 +23,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
          %13 = OpConstantComposite %v3uint %uint_123 %uint_122 %uint_121
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm64
index d96630b..0ab8b1d 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_int3_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
@@ -27,7 +27,7 @@
    %uint_122 = OpConstant %uint 122
    %uint_121 = OpConstant %uint 121
          %16 = OpConstantComposite %v3uint %uint_123 %uint_122 %uint_121
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %17 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm32
index 1710d73..f7633a9 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
@@ -25,7 +25,7 @@
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
          %15 = OpConstantComposite %v4uint %uint_123 %uint_122 %uint_121 %uint_119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %16 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm64
index 1f82ba6..6241114 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_int4_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
@@ -28,7 +28,7 @@
    %uint_121 = OpConstant %uint 121
    %uint_119 = OpConstant %uint 119
          %17 = OpConstantComposite %v4uint %uint_123 %uint_122 %uint_121 %uint_119
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %18 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm32
index 39011d5..c6fa218 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %uint_123 = OpConstant %uint 123
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm64
index 11afdb7..e068f8c 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_int_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
    %uint_123 = OpConstant %uint 123
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm32
index f4d521e..52411f2 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
 %ulong_34359738368 = OpConstant %ulong 34359738368
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm64
index 36a50ed..4349ed3 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_long_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
 %ulong_34359738368 = OpConstant %ulong 34359738368
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm32
index b735f0a..924e675 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
 %ushort_32000 = OpConstant %ushort 32000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm64
index 760f0c6..0be76e2 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_short_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
 %ushort_32000 = OpConstant %ushort 32000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm32
index f12b288..1d6c526 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Linkage
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
   %_struct_9 = OpTypeStruct %uint %uchar
@@ -25,7 +25,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_128 = OpConstant %uchar 128
          %14 = OpConstantComposite %_struct_9 %uint_2100483600 %uchar_128
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm64
index 717244a..e0387e4 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_int_char_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %uchar = OpTypeInt 8 0
@@ -28,7 +28,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_128 = OpConstant %uchar 128
          %16 = OpConstantComposite %_struct_10 %uint_2100483600 %uchar_128
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %17 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm32
index b781cd9..6c0fb9d 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Kernel
                OpCapability Linkage
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
   %_struct_9 = OpTypeStruct %uint %float
@@ -24,7 +24,7 @@
   %uint_1024 = OpConstant %uint 1024
 %float_3_1415 = OpConstant %float 3.1415
          %14 = OpConstantComposite %_struct_9 %uint_1024 %float_3_1415
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm64
index 6a9a898..13b549a 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_int_float_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %float = OpTypeFloat 32
@@ -27,7 +27,7 @@
   %uint_1024 = OpConstant %uint 1024
 %float_3_1415 = OpConstant %float 3.1415
          %16 = OpConstantComposite %_struct_10 %uint_1024 %float_3_1415
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %17 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm32
index 7f8277f..978e921 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Int8
                OpCapability Linkage
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v2uint = OpTypeVector %uint 2
       %uchar = OpTypeInt 8 0
@@ -31,7 +31,7 @@
          %18 = OpConstantComposite %_struct_10 %uint_2100483600 %uchar_128
          %19 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
          %20 = OpConstantComposite %_struct_11 %19 %18
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_11
          %21 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm64
index 9573ada..29f4b46 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_struct_struct_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v2uint = OpTypeVector %uint 2
@@ -34,7 +34,7 @@
          %20 = OpConstantComposite %_struct_11 %uint_2100483600 %uchar_128
          %21 = OpConstantComposite %v2uint %uint_2100480000 %uint_2100480000_0
          %22 = OpConstantComposite %_struct_12 %21 %20
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_12
          %23 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm32
index ca50b9f..0450ba0 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %uchar_19 = OpConstant %uchar 19
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm64
index 70726a9..87072e3 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_uchar_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
    %uchar_19 = OpConstant %uchar 19
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm32
index b10ca7d..e36766f 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
  %uint_54321 = OpConstant %uint 54321
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm64
index ace794d..c0af753 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_uint_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
  %uint_54321 = OpConstant %uint 54321
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm32
index 91b60fc..678a5f3 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
 %ulong_9223372036854775810 = OpConstant %ulong 9223372036854775810
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm64
index 5731a0a..caf6e55 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_ulong_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
 %ulong_9223372036854775810 = OpConstant %ulong 9223372036854775810
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm32
index 96d4f29..f6fbcdb 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "copy_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
 %ushort_65000 = OpConstant %ushort 65000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm64
index cfce560..fd6b1c2 100644
--- a/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/copy_ushort_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "copy_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "copy_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
 %ushort_65000 = OpConstant %ushort 65000
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm32
index ea571a1..94bf154 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_aliased" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_aliased"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,11 +20,11 @@
                OpGroupDecorate %6 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm64
index 1e68600..e57a5ff 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_aliased.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_aliased" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_aliased"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %6 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm32
index 51ed0e3..b2a2ae2 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_alignment" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_alignment"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,11 +20,11 @@
                OpGroupDecorate %6 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm64
index e7b4bde..f6bb852 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_alignment.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_alignment" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_alignment"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %6 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm32
new file mode 100644
index 0000000..eddcdb3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm32
@@ -0,0 +1,80 @@
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %2 "decorate_coherent"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %call1 "call1"
+               OpName %call2 "call2"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit6 "vecinit6"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call7_old "call7.old"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src Coherent
+               OpDecorate %19 Constant
+         %19 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %19 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+    %uint_16 = OpConstant %uint 16
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+     %v3uint = OpTypeVector %uint 3
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %28 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %29 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %28
+     %v2uint = OpTypeVector %uint 2
+         %31 = OpTypeSampler
+         %32 = OpTypeSampledImage %28
+      %float = OpTypeFloat 32
+         %34 = OpConstantSampler %31 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+          %2 = OpFunction %void None %29
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %28
+      %entry = OpLabel
+         %36 = OpUndef %v2uint
+         %37 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %uint %37 0
+         %38 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %uint %38 1
+         %39 = OpLoad %v3uint %__spirv_BuiltInGlobalSize Aligned 0
+      %call2 = OpCompositeExtract %uint %39 0
+        %mul = OpIMul %uint %call2 %call1
+        %add = OpIAdd %uint %mul %call
+    %vecinit = OpCompositeInsert %v2uint %call1 %36 0
+   %vecinit6 = OpCompositeInsert %v2uint %call %vecinit 1
+%TempSampledImage = OpSampledImage %32 %src %34
+  %call7_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit6 Lod %float_0
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %add
+               OpStore %arrayidx %call7_old Aligned 16
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm64
new file mode 100644
index 0000000..d86a984
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_coherent.spvasm64
@@ -0,0 +1,93 @@
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %2 "decorate_coherent"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %conv "conv"
+               OpName %call1 "call1"
+               OpName %conv2 "conv2"
+               OpName %conv3 "conv3"
+               OpName %call4 "call4"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit10 "vecinit10"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call11_old "call11.old"
+               OpName %sext "sext"
+               OpName %idxprom "idxprom"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src Coherent
+               OpDecorate %24 Constant
+         %24 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %24 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+      %ulong = OpTypeInt 64 0
+    %uint_16 = OpConstant %uint 16
+   %ulong_32 = OpConstant %ulong 32
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+    %v3ulong = OpTypeVector %ulong 3
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %35 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %36 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %35
+     %v2uint = OpTypeVector %uint 2
+         %38 = OpTypeSampler
+         %39 = OpTypeSampledImage %35
+      %float = OpTypeFloat 32
+         %41 = OpConstantSampler %38 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+          %2 = OpFunction %void None %36
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %35
+      %entry = OpLabel
+         %43 = OpUndef %v2uint
+         %44 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %ulong %44 0
+       %conv = OpUConvert %uint %call
+         %45 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %ulong %45 1
+      %conv2 = OpUConvert %uint %call1
+      %conv3 = OpSConvert %ulong %conv2
+         %46 = OpLoad %v3ulong %__spirv_BuiltInGlobalSize Aligned 0
+      %call4 = OpCompositeExtract %ulong %46 0
+        %mul = OpIMul %ulong %conv3 %call4
+        %add = OpIAdd %ulong %mul %call
+    %vecinit = OpCompositeInsert %v2uint %conv2 %43 0
+  %vecinit10 = OpCompositeInsert %v2uint %conv %vecinit 1
+%TempSampledImage = OpSampledImage %39 %src %41
+ %call11_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit10 Lod %float_0
+       %sext = OpShiftLeftLogical %ulong %add %ulong_32
+    %idxprom = OpShiftRightArithmetic %ulong %sext %ulong_32
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %idxprom
+               OpStore %arrayidx %call11_old Aligned 16
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm32
index 2bfd5a2..482526d 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_constant" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_constant"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,11 +20,11 @@
                OpGroupDecorate %6 %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm64
index a1408ae..e2c5547 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_constant.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_constant" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_constant"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %6 %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm32
index a5a71f6..bf5c981 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_constant" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_constant"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,11 +20,11 @@
                OpGroupDecorate %6 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm64
index df02172..9d8bc9d 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_constant_fail.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_constant" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_constant"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %6 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm32
index 5aa89a4..4150342 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_cpacked" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_cpacked"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %_struct_4 CPacked
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
   %_struct_4 = OpTypeStruct %uint %uchar
@@ -26,7 +26,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_127 = OpConstant %uchar 127
          %14 = OpConstantComposite %_struct_4 %uint_2100483600 %uchar_127
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_4
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm64
index 41dee76..aa42b25 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_cpacked.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_cpacked" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_cpacked"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -18,7 +18,7 @@
                OpDecorate %_struct_4 CPacked
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %uchar = OpTypeInt 8 0
@@ -29,7 +29,7 @@
 %uint_2100483600 = OpConstant %uint 2100483600
   %uchar_127 = OpConstant %uchar 127
          %16 = OpConstantComposite %_struct_4 %uint_2100483600 %uchar_127
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_4
          %17 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm32
new file mode 100644
index 0000000..92fdb89
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm32
@@ -0,0 +1,66 @@
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 31
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability ImageBasic
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %2 "decorate_nonreadable"
+               OpSource OpenCL_C 100000
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %call1 "call1"
+               OpName %call2 "call2"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit3 "vecinit3"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %dst NonReadable
+               OpDecorate %16 Constant
+         %16 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %src FuncParamAttr NoCapture
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpGroupDecorate %16 %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
+       %void = OpTypeVoid
+         %21 = OpTypeImage %void 2D 0 0 0 0 Unknown WriteOnly
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %24 = OpTypeFunction %void %21 %_ptr_CrossWorkgroup_v4uint
+     %v2uint = OpTypeVector %uint 2
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+          %2 = OpFunction %void None %24
+        %dst = OpFunctionParameter %21
+        %src = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+      %entry = OpLabel
+         %26 = OpUndef %v2uint
+         %27 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %uint %27 0
+         %28 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %uint %28 1
+         %29 = OpLoad %v3uint %__spirv_BuiltInGlobalSize Aligned 0
+      %call2 = OpCompositeExtract %uint %29 0
+        %mul = OpIMul %uint %call2 %call1
+        %add = OpIAdd %uint %mul %call
+    %vecinit = OpCompositeInsert %v2uint %call1 %26 0
+   %vecinit3 = OpCompositeInsert %v2uint %call %vecinit 1
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %src %add
+         %30 = OpLoad %v4uint %arrayidx Aligned 16
+               OpImageWrite %dst %vecinit3 %30
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm64
new file mode 100644
index 0000000..596cc49
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_nonreadable.spvasm64
@@ -0,0 +1,79 @@
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 38
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability ImageBasic
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %2 "decorate_nonreadable"
+               OpSource OpenCL_C 100000
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %conv "conv"
+               OpName %call1 "call1"
+               OpName %conv2 "conv2"
+               OpName %conv3 "conv3"
+               OpName %call4 "call4"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit7 "vecinit7"
+               OpName %sext "sext"
+               OpName %idxprom "idxprom"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %dst NonReadable
+               OpDecorate %21 Constant
+         %21 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %src FuncParamAttr NoCapture
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpGroupDecorate %21 %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+      %ulong = OpTypeInt 64 0
+       %uint = OpTypeInt 32 0
+   %ulong_32 = OpConstant %ulong 32
+    %v3ulong = OpTypeVector %ulong 3
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
+       %void = OpTypeVoid
+         %28 = OpTypeImage %void 2D 0 0 0 0 Unknown WriteOnly
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %31 = OpTypeFunction %void %28 %_ptr_CrossWorkgroup_v4uint
+     %v2uint = OpTypeVector %uint 2
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+          %2 = OpFunction %void None %31
+        %dst = OpFunctionParameter %28
+        %src = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+      %entry = OpLabel
+         %33 = OpUndef %v2uint
+         %34 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %ulong %34 0
+       %conv = OpUConvert %uint %call
+         %35 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %ulong %35 1
+      %conv2 = OpUConvert %uint %call1
+      %conv3 = OpSConvert %ulong %conv2
+         %36 = OpLoad %v3ulong %__spirv_BuiltInGlobalSize Aligned 0
+      %call4 = OpCompositeExtract %ulong %36 0
+        %mul = OpIMul %ulong %conv3 %call4
+        %add = OpIAdd %ulong %mul %call
+    %vecinit = OpCompositeInsert %v2uint %conv2 %33 0
+   %vecinit7 = OpCompositeInsert %v2uint %conv %vecinit 1
+       %sext = OpShiftLeftLogical %ulong %add %ulong_32
+    %idxprom = OpShiftRightArithmetic %ulong %sext %ulong_32
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %src %idxprom
+         %37 = OpLoad %v4uint %arrayidx Aligned 16
+               OpImageWrite %dst %vecinit7 %37
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm32
new file mode 100644
index 0000000..e4b25e3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm32
@@ -0,0 +1,80 @@
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %2 "decorate_nonwritable"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %call1 "call1"
+               OpName %call2 "call2"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit6 "vecinit6"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call7_old "call7.old"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src NonWritable
+               OpDecorate %19 Constant
+         %19 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %19 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+    %uint_16 = OpConstant %uint 16
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+     %v3uint = OpTypeVector %uint 3
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %28 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %29 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %28
+     %v2uint = OpTypeVector %uint 2
+         %31 = OpTypeSampler
+         %32 = OpTypeSampledImage %28
+      %float = OpTypeFloat 32
+         %34 = OpConstantSampler %31 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+          %2 = OpFunction %void None %29
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %28
+      %entry = OpLabel
+         %36 = OpUndef %v2uint
+         %37 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %uint %37 0
+         %38 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %uint %38 1
+         %39 = OpLoad %v3uint %__spirv_BuiltInGlobalSize Aligned 0
+      %call2 = OpCompositeExtract %uint %39 0
+        %mul = OpIMul %uint %call2 %call1
+        %add = OpIAdd %uint %mul %call
+    %vecinit = OpCompositeInsert %v2uint %call1 %36 0
+   %vecinit6 = OpCompositeInsert %v2uint %call %vecinit 1
+%TempSampledImage = OpSampledImage %32 %src %34
+  %call7_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit6 Lod %float_0
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %add
+               OpStore %arrayidx %call7_old Aligned 16
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm64
new file mode 100644
index 0000000..0f52968
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_nonwritable.spvasm64
@@ -0,0 +1,93 @@
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %2 "decorate_nonwritable"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %conv "conv"
+               OpName %call1 "call1"
+               OpName %conv2 "conv2"
+               OpName %conv3 "conv3"
+               OpName %call4 "call4"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit10 "vecinit10"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call11_old "call11.old"
+               OpName %sext "sext"
+               OpName %idxprom "idxprom"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src NonWritable
+               OpDecorate %24 Constant
+         %24 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %24 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+      %ulong = OpTypeInt 64 0
+    %uint_16 = OpConstant %uint 16
+   %ulong_32 = OpConstant %ulong 32
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+    %v3ulong = OpTypeVector %ulong 3
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %35 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %36 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %35
+     %v2uint = OpTypeVector %uint 2
+         %38 = OpTypeSampler
+         %39 = OpTypeSampledImage %35
+      %float = OpTypeFloat 32
+         %41 = OpConstantSampler %38 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+          %2 = OpFunction %void None %36
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %35
+      %entry = OpLabel
+         %43 = OpUndef %v2uint
+         %44 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %ulong %44 0
+       %conv = OpUConvert %uint %call
+         %45 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %ulong %45 1
+      %conv2 = OpUConvert %uint %call1
+      %conv3 = OpSConvert %ulong %conv2
+         %46 = OpLoad %v3ulong %__spirv_BuiltInGlobalSize Aligned 0
+      %call4 = OpCompositeExtract %ulong %46 0
+        %mul = OpIMul %ulong %conv3 %call4
+        %add = OpIAdd %ulong %mul %call
+    %vecinit = OpCompositeInsert %v2uint %conv2 %43 0
+  %vecinit10 = OpCompositeInsert %v2uint %conv %vecinit 1
+%TempSampledImage = OpSampledImage %39 %src %41
+ %call11_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit10 Lod %float_0
+       %sext = OpShiftLeftLogical %ulong %add %ulong_32
+    %idxprom = OpShiftRightArithmetic %ulong %sext %ulong_32
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %idxprom
+               OpStore %arrayidx %call11_old Aligned 16
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm32
index 7c06103..f05e916 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_restrict" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_restrict"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,11 +20,11 @@
                OpGroupDecorate %6 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm64
index 5bdaf73..b9181c9 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_restrict.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_restrict" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_restrict"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %6 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm32
index 32b6fe8..e57fe7b 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Float64
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rte_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rte_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTE
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm64
index b7b3754..7b56ca2 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_double_long.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rte_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rte_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTE
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm32
index 45e840a..9a93f23 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rte_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rte_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -17,13 +17,13 @@
                OpDecorate %6 FPRoundingMode RTE
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm64
index 771975e..45809d6 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rte_float_int.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rte_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rte_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -18,7 +18,7 @@
                OpDecorate %6 FPRoundingMode RTE
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm32
index 6ca2023..30a398e 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Float64
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtn_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtn_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTN
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm64
index 13b3e2b..a164bd5 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_double_long.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtn_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtn_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTN
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm32
index 60e16d0..1db15d3 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtn_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtn_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -17,13 +17,13 @@
                OpDecorate %6 FPRoundingMode RTN
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm64
index 41fb1c7..8b8ee77 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtn_float_int.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtn_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtn_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -18,7 +18,7 @@
                OpDecorate %6 FPRoundingMode RTN
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm32
index a2b097a..e6d80eb 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Float64
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtp_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtp_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTP
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm64
index 4eaa81d..b7f5915 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_double_long.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtp_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtp_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTP
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm32
index cce1cbc..09b4b98 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtp_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtp_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -17,13 +17,13 @@
                OpDecorate %6 FPRoundingMode RTP
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm64
index 6f315db..5a74457 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtp_float_int.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtp_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtp_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -18,7 +18,7 @@
                OpDecorate %6 FPRoundingMode RTP
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm32
index d9dfe72..8eb632d 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Float64
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtz_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtz_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTZ
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm64
index 7738138..7c5c346 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_double_long.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtz_double_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtz_double_long"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -19,14 +19,14 @@
                OpDecorate %6 FPRoundingMode RTZ
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm32
index 288df6d..21b4d4f 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtz_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtz_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -17,13 +17,13 @@
                OpDecorate %6 FPRoundingMode RTZ
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm64
index 728d8bb..6f0b123 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_rounding_rtz_float_int.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_rounding_rtz_float_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_rounding_rtz_float_int"
                OpName %res "res"
                OpName %in "in"
                OpName %entry "entry"
@@ -18,7 +18,7 @@
                OpDecorate %6 FPRoundingMode RTZ
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm32
index 5437067..4aa86c1 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_char" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_char"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm64
index ba4d649..e102b84 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_char.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_char" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_char"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm32
index 3fa47c9..1e2e7bd 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_int"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,13 +19,13 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm64
index 8609e20..7ea8af9 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_int.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_int"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm32
index dbb3b44..c37a35b 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_short"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm64
index 2915c12..02bb819 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_short.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_short"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm32
index 9bffb68..b0b52ed 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uchar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uchar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm64
index 354639f..19c1415 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uchar.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uchar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uchar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm32
index 0672489..1426f11 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uint" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uint"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,13 +19,13 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm64
index 7d9efb0..cd597d7 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_uint.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uint" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_uint"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm32
index ffbb417..56555aa 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_ushort" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_ushort"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpDecorate %7 SaturatedConversion
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm64
index 317f992..72bf0e0 100644
--- a/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/decorate_saturated_conversion_ushort.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "decorate_saturated_conversion_ushort" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "decorate_saturated_conversion_ushort"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpDecorate %7 SaturatedConversion
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm32 b/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm32
new file mode 100644
index 0000000..94b1716
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm32
@@ -0,0 +1,80 @@
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 40
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %2 "decorate_volatile"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %call1 "call1"
+               OpName %call2 "call2"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit6 "vecinit6"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call7_old "call7.old"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src Volatile
+               OpDecorate %19 Constant
+         %19 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %19 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+    %uint_16 = OpConstant %uint 16
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+     %v3uint = OpTypeVector %uint 3
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %28 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %29 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %28
+     %v2uint = OpTypeVector %uint 2
+         %31 = OpTypeSampler
+         %32 = OpTypeSampledImage %28
+      %float = OpTypeFloat 32
+         %34 = OpConstantSampler %31 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
+          %2 = OpFunction %void None %29
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %28
+      %entry = OpLabel
+         %36 = OpUndef %v2uint
+         %37 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %uint %37 0
+         %38 = OpLoad %v3uint %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %uint %38 1
+         %39 = OpLoad %v3uint %__spirv_BuiltInGlobalSize Aligned 0
+      %call2 = OpCompositeExtract %uint %39 0
+        %mul = OpIMul %uint %call2 %call1
+        %add = OpIAdd %uint %mul %call
+    %vecinit = OpCompositeInsert %v2uint %call1 %36 0
+   %vecinit6 = OpCompositeInsert %v2uint %call %vecinit 1
+%TempSampledImage = OpSampledImage %32 %src %34
+  %call7_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit6 Lod %float_0
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %add
+               OpStore %arrayidx %call7_old Aligned 16
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm64 b/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm64
new file mode 100644
index 0000000..9a151da
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/decorate_volatile.spvasm64
@@ -0,0 +1,93 @@
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 47
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability ImageBasic
+               OpCapability LiteralSampler
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %2 "decorate_volatile"
+               OpName %sampler "sampler"
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %__spirv_BuiltInGlobalSize "__spirv_BuiltInGlobalSize"
+               OpName %dst "dst"
+               OpName %src "src"
+               OpName %entry "entry"
+               OpName %call "call"
+               OpName %conv "conv"
+               OpName %call1 "call1"
+               OpName %conv2 "conv2"
+               OpName %conv3 "conv3"
+               OpName %call4 "call4"
+               OpName %mul "mul"
+               OpName %add "add"
+               OpName %vecinit "vecinit"
+               OpName %vecinit10 "vecinit10"
+               OpName %TempSampledImage "TempSampledImage"
+               OpName %call11_old "call11.old"
+               OpName %sext "sext"
+               OpName %idxprom "idxprom"
+               OpName %arrayidx "arrayidx"
+               OpDecorate %src Volatile
+               OpDecorate %24 Constant
+         %24 = OpDecorationGroup
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalSize BuiltIn GlobalSize
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %sampler LinkageAttributes "sampler" Export
+               OpDecorate %__spirv_BuiltInGlobalSize LinkageAttributes "__spirv_BuiltInGlobalSize" Import
+               OpDecorate %__spirv_BuiltInGlobalInvocationId LinkageAttributes "__spirv_BuiltInGlobalInvocationId" Import
+               OpDecorate %sampler Alignment 4
+               OpGroupDecorate %24 %sampler %__spirv_BuiltInGlobalInvocationId %__spirv_BuiltInGlobalSize
+       %uint = OpTypeInt 32 0
+      %ulong = OpTypeInt 64 0
+    %uint_16 = OpConstant %uint 16
+   %ulong_32 = OpConstant %ulong 32
+%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
+    %v3ulong = OpTypeVector %ulong 3
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
+       %void = OpTypeVoid
+     %v4uint = OpTypeVector %uint 4
+%_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
+         %35 = OpTypeImage %void 2D 0 0 0 0 Unknown ReadOnly
+         %36 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %35
+     %v2uint = OpTypeVector %uint 2
+         %38 = OpTypeSampler
+         %39 = OpTypeSampledImage %35
+      %float = OpTypeFloat 32
+         %41 = OpConstantSampler %38 None 0 Nearest
+    %float_0 = OpConstant %float 0
+    %sampler = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_16
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+%__spirv_BuiltInGlobalSize = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
+          %2 = OpFunction %void None %36
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
+        %src = OpFunctionParameter %35
+      %entry = OpLabel
+         %43 = OpUndef %v2uint
+         %44 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+       %call = OpCompositeExtract %ulong %44 0
+       %conv = OpUConvert %uint %call
+         %45 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 0
+      %call1 = OpCompositeExtract %ulong %45 1
+      %conv2 = OpUConvert %uint %call1
+      %conv3 = OpSConvert %ulong %conv2
+         %46 = OpLoad %v3ulong %__spirv_BuiltInGlobalSize Aligned 0
+      %call4 = OpCompositeExtract %ulong %46 0
+        %mul = OpIMul %ulong %conv3 %call4
+        %add = OpIAdd %ulong %mul %call
+    %vecinit = OpCompositeInsert %v2uint %conv2 %43 0
+  %vecinit10 = OpCompositeInsert %v2uint %conv %vecinit 1
+%TempSampledImage = OpSampledImage %39 %src %41
+ %call11_old = OpImageSampleExplicitLod %v4uint %TempSampledImage %vecinit10 Lod %float_0
+       %sext = OpShiftLeftLogical %ulong %add %ulong_32
+    %idxprom = OpShiftRightArithmetic %ulong %sext %ulong_32
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4uint %dst %idxprom
+               OpStore %arrayidx %call11_old Aligned 16
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm32
index b76c3a3..e131176 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm32
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpGroupDecorate %7 %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm64
index 1fa09e9..26b81ed 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm64
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm32
index f1320bb..be55242 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm32
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpGroupDecorate %7 %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm64
index ad04b78..3cddeea 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm64
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm32
index 8b41542..0844d73 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm32
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpGroupDecorate %7 %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm64
index 0661442..1a7cd92 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm64
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm32
index 3c6aebd..a9ffa99 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm32
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpGroupDecorate %7 %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm64
index d85f61a..73ca88b 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm64
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm32
index baf2707..0642373 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm32
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 22
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -25,11 +25,11 @@
        %uint = OpTypeInt 32 0
      %uint_0 = OpConstant %uint 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm64
index 7be5e17..654b497 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm64
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 26
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -27,11 +27,11 @@
        %uint = OpTypeInt 32 0
      %uint_0 = OpConstant %uint 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm32
index 49e49da..4e80aef 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm32
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 25
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -25,11 +25,11 @@
        %uint = OpTypeInt 32 0
     %uint_31 = OpConstant %uint 31
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm64
index 572b724..82c83af 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm64
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 30
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -27,11 +27,11 @@
        %uint = OpTypeInt 32 0
     %uint_31 = OpConstant %uint 31
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm32
index 764f1c4..bd8f3aa 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm32
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 25
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -25,11 +25,11 @@
        %uint = OpTypeInt 32 0
     %uint_31 = OpConstant %uint 31
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm64
index 325f413..a0e8a75 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm64
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 30
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -27,11 +27,11 @@
        %uint = OpTypeInt 32 0
     %uint_31 = OpConstant %uint 31
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm32
index f2f28a1..9dc688e 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm32
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpDecorate %8 NoSignedWrap
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm64
index 159629a..a010d1c 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm64
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm32 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm32
index e2dc884..43ce257 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm32
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 23
 ; Schema: 0
@@ -9,7 +9,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -24,11 +24,11 @@
                OpDecorate %8 NoUnsignedWrap
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3uint Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %14
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm64 b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm64
index 4dfdc80..e68e559 100644
--- a/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm64
@@ -1,5 +1,5 @@
-; Version: 1.0
+; Version: 1.1
 ; Generator: Khronos SPIR-V Tools Assembler; 0
 ; Bound: 28
 ; Schema: 0
@@ -10,7 +10,7 @@
                OpExtension "SPV_KHR_no_integer_wrap_decoration"
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_cl" %__spirv_BuiltInGlobalInvocationId
+               OpEntryPoint Kernel %2 "fmath_cl"
                OpSource OpenCL_C 200000
                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
                OpName %out "out"
@@ -26,11 +26,11 @@
       %ulong = OpTypeInt 64 0
        %uint = OpTypeInt 32 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm32
index ee4be5a..9a4a933 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm64
index 7a7f3e1..9e51b19 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fadd_double.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm32
index b8cab7e..111d6c8 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm64
index a4571db..b77917c 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fadd_double2.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm32
index 482f9a7..ef7d045 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm64
index 21d5977..15b5652 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fadd_float.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm32
index d3c0e6d..6f318ad 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm64
index 76519c3..7c83efa 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fadd_float4.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm32
index f818cb4..7f9ff11 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm64
index 1af4127..7f069cc 100644
--- a/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fadd_half.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm32
index 6e25c8f..eae4280 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm64
index efbd1e2..613178f 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_double.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm32
index 5169102..dff1bd9 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm64
index 9b73c4f..0906974 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_double2.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm32
index 69efe6f..f12a75f 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm64
index 06676f9..d9f7e37 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_float.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm32
index 3f1324a..2e494d9 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm64
index aeab8e8..bcfbee7 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_float4.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm32
index 35c8c87..03c56e5 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm64
index 9630ec1..1001b32 100644
--- a/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fdiv_half.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm32
index 06d65d2..477f7b3 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm64
index 92121ec..a5c4cd4 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmod_double.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm32
index f50bb99..c87a039 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm64
index bdaa98e..8fd6538 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmod_double2.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm32
index d1e7961..89e6708 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm64
index ce085d5..e9262b1 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmod_float.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm32
index 60235ea..e23bce8 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm64
index 58663d6..aaf4206 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmod_float4.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm32
index 1884299..68f0557 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm64
index 62798c2..9778bc0 100644
--- a/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmod_half.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm32
index a9c4779..38db8d8 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm64
index 308f2d9..40b13b0 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmul_double.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm32
index 2101c7f..0d63134 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm64
index 05d0113..5cb1dae 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmul_double2.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm32
index 284739f..0b32b87 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm64
index f082b9f..e10a425 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmul_float.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm32
index ca9a1f3..1884e58 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm64
index f1e9b99..63c5dd7 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmul_float4.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm32
index 1cdf4a4..efeee4e 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm64
index 0061e32..a944ced 100644
--- a/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fmul_half.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/frem_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/frem_double.spvasm32
index 091fb08..059bcb7 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/frem_double.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/frem_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/frem_double.spvasm64
index 4675c1d..e9372cd 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/frem_double.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm32
index ef8050b..6b6df42 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm64
index a92e95b..afcd086 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/frem_double2.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/frem_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/frem_float.spvasm32
index 5cfd0cd..8a67ea3 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/frem_float.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/frem_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/frem_float.spvasm64
index 36b631d..4b08706 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/frem_float.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm32
index 0b0b6e8..d2f06b4 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm64
index 55cc605..efecb17 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/frem_float4.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/frem_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/frem_half.spvasm32
index e4a8b8a..f6fc04e 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/frem_half.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/frem_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/frem_half.spvasm64
index 8cfe36a..0608963 100644
--- a/test_conformance/spirv_new/spirv_asm/frem_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/frem_half.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm32
index b9d3d9f..a8f161f 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm64
index 8cdbfe3..36beb94 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fsub_double.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm32 b/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm32
index f892275..6a3aa18 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm64 b/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm64
index 3ac187d..1c73bc9 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fsub_double2.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,14 +23,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_v2double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm32
index 70f93da..95d58eb 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,12 +21,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm64
index 603974d..ff1089f 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fsub_float.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,13 +22,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm32
index a992d6e..039864f 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,13 +21,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm64
index 2f3ad1b..4b7ace2 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fsub_float4.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,14 +22,14 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm32
index 09112d7..c8698a4 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -22,12 +22,12 @@
                OpGroupDecorate %7 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm64
index 4fd41e2..7889c84 100644
--- a/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/fsub_half.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Float16
           %1 = OpExtInstImport "OpenCL.std"
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %2 "fmath_spv" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %2 "fmath_spv"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -23,13 +23,13 @@
                OpGroupDecorate %7 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_half
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %2 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_half
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
diff --git a/test_conformance/spirv_new/spirv_asm/label_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/label_simple.spvasm32
index e5b826b..5fd4630 100644
--- a/test_conformance/spirv_new/spirv_asm/label_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/label_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "label_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "label_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/label_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/label_simple.spvasm64
index e496c25..2c403fa 100644
--- a/test_conformance/spirv_new/spirv_asm/label_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/label_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "label_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "label_simple"
                OpName %in "in"
                OpName %out "out"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm32
index df2cb59..644607a 100644
--- a/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "lifetime_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "lifetime_simple"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm64
index d24242f..06e4b81 100644
--- a/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/lifetime_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "lifetime_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "lifetime_simple"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm32 b/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm32
index 79217e7..0171ce3 100644
--- a/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "test_linkage" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "test_linkage"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %4 LinkageAttributes "simple_fnegate_linkage" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %4 = OpFunction %float Const %12
          %13 = OpFunctionParameter %float
diff --git a/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm64 b/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm64
index 707d853..c3212a2 100644
--- a/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/linkage_import.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "test_linkage" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "test_linkage"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %4 LinkageAttributes "simple_fnegate_linkage" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %13 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %4 = OpFunction %float Const %13
          %14 = OpFunctionParameter %float
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm32
index 790dfa8..d9a7a9b 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_dont_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_dont_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm64
index d7e21e4..286cbd8 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_dont_unroll.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_dont_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_dont_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm32
index 50ccc63..2e556de 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_none"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm64
index c6e88ed..cef58ed 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_none.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_none"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm32
index ad94ac0..34e0d4a 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm64
index b6b6f7a..7abaa6f 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_conditional_unroll.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_conditional_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm32
index e6ef6b6..efa6872 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_dont_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_dont_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm64
index 932ca39..6f58f2c 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_dont_unroll.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_dont_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_dont_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm32
index e545284..b925d5d 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_none"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm64
index e123666..3164d2b 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_none.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_none"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm32 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm32
index 4358262..f5387ab 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -20,7 +20,7 @@
                OpGroupDecorate %6 %res %in
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm64 b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm64
index 7a9520a..fb6fcb6 100644
--- a/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/loop_merge_branch_unroll.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "loop_merge_branch_unroll" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "loop_merge_branch_unroll"
                OpName %res "res"
                OpName %in "in"
                OpName %rep "rep"
@@ -21,7 +21,7 @@
                OpGroupDecorate %6 %res %in
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -31,7 +31,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm32
index 9dc1561..b6640b8 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_const" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_const"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %11 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %12 = OpFunction %float Const %11
          %13 = OpFunctionParameter %float
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm64
index 58a497c..9704356 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_const.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_const" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_const"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %13 = OpFunction %float Const %12
          %14 = OpFunctionParameter %float
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm32
index f344df3..6067d66 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_inline" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_inline"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %11 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %12 = OpFunction %float Inline %11
          %13 = OpFunctionParameter %float
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm64
index ee4e9c9..f5b4a7a 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_inline.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_inline" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_inline"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %13 = OpFunction %float Inline %12
          %14 = OpFunctionParameter %float
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm32
index 8c93a49..2c6b8e0 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_noinline" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_noinline"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %11 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %12 = OpFunction %float DontInline %11
          %13 = OpFunctionParameter %float
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm64
index cdf1990..67db809 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_noinline.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_noinline" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_noinline"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %13 = OpFunction %float DontInline %12
          %14 = OpFunctionParameter %float
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm32
index cd4345b..634df87 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_none"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %11 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %12 = OpFunction %float None %11
          %13 = OpFunctionParameter %float
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm64
index f242ae7..451f9b7 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_none.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_none"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %13 = OpFunction %float None %12
          %14 = OpFunctionParameter %float
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm32
index 4922f67..f4e3d67 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_pure" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_pure"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
          %11 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %12 = OpFunction %float Pure %11
          %13 = OpFunctionParameter %float
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm64
index 8c792ce..7ee623d 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_pure.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_pure" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_pure"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %13 = OpFunction %float Pure %12
          %14 = OpFunctionParameter %float
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm32
index b8f958d..deab3d7 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_function_pure_ptr" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_pure_ptr"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,7 +15,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
@@ -23,7 +23,7 @@
     %uint_32 = OpConstant %uint 32
          %12 = OpTypeFunction %float %_ptr_CrossWorkgroup_float %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %uint %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %14 = OpFunction %float Pure %12
          %15 = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %16 = OpFunctionParameter %uint
diff --git a/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm64
index 4a5896d..3ebe9e4 100644
--- a/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_function_pure_ptr.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_function_pure_ptr" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_function_pure_ptr"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
@@ -24,7 +24,7 @@
    %ulong_32 = OpConstant %ulong 32
          %12 = OpTypeFunction %float %_ptr_CrossWorkgroup_float %ulong
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %ulong %float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
          %14 = OpFunction %float Pure %12
          %15 = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %16 = OpFunctionParameter %ulong
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm32
index 868565c..8864d09 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_double" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_double"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm64
index 38e71c9..0e012e8 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_double.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_double" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_double"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm32
index e3cd3c9..90e5d6f 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_float" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_float"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm64
index 085d114..d3b6ce2 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_float.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_float" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_float"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm32
index c65b7e9..8752e74 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_float4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_float4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm64
index 16c647b..6909517 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_float4.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_float4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_float4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm32
index 98bf6f7..ed59bd7 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_int"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm64
index 6dd86da..241871b 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_int.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_int"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm32
index 61042e4..0e111b0 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm64
index e741ef7..e1782d6 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_int4.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm32
index ab9d29d..c513fd3 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_long"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm64
index 2eeb565..edee512 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_long.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_long"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm32
index 18e7040..8e59eef 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_short"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm64
index 546b104..ca75370 100644
--- a/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_short.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_neg_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_neg_short"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm32
index 8608e3a..ef8f7b0 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_not_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_int"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm64
index bffdfb7..f796379 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_not_int.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_not_int" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_int"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm32
index caf5bee..07b900b 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_not_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm64
index f39c703..7eba008 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_not_int4.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_not_int4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_int4"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm32
index a94c8c5..f88819d 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_not_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_long"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm64
index 0cb309f..41dcc9f 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_not_long.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_not_long" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_long"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm32
index 15d5284..3718b91 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "op_not_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_short"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm64
index e250711..0d371c3 100644
--- a/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/op_not_short.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "op_not_short" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "op_not_short"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm32
deleted file mode 100644
index 00cd989..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm32
+++ /dev/null
@@ -1,36 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Float64
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.double*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 8
-               OpDecorate %double_0 SpecId 101
-     %double = OpTypeFloat 64
-%_ptr_UniformConstant_double = OpTypePointer UniformConstant %double
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
-   %double_0 = OpSpecConstant %double 0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_double UniformConstant %double_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_double
-      %entry = OpLabel
-         %12 = OpLoad %double %output_value Aligned 8
-         %13 = OpLoad %double %spec_const_kernel_spec_constant_value Aligned 8
-        %add = OpFAdd %double %12 %13
-               OpStore %output_value %add Aligned 8
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm64
deleted file mode 100644
index b50501b..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_double_simple.spvasm64
+++ /dev/null
@@ -1,36 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Float64
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.double*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 8
-               OpDecorate %double_0 SpecId 101
-     %double = OpTypeFloat 64
-%_ptr_UniformConstant_double = OpTypePointer UniformConstant %double
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
-   %double_0 = OpSpecConstant %double 0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_double UniformConstant %double_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_double
-      %entry = OpLabel
-         %12 = OpLoad %double %output_value Aligned 8
-         %13 = OpLoad %double %spec_const_kernel_spec_constant_value Aligned 8
-        %add = OpFAdd %double %12 %13
-               OpStore %output_value %add Aligned 8
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm32
deleted file mode 100644
index 8492474..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm32
+++ /dev/null
@@ -1,55 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 28
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Linkage
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %11 "spec_const_kernel"
-         %27 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %test_value "test_value"
-               OpName %entry "entry"
-               OpName %output_value "output_value"
-               OpName %entry_0 "entry"
-               OpName %if_then "if.then"
-               OpName %if_end "if.end"
-               OpDecorate %test_value FuncParamAttr Zext
-               OpDecorate %test_value LinkageAttributes "test_value" Export
-               OpDecorate %false SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpConstant %uchar 0
-    %uchar_1 = OpConstant %uchar 1
-     %uint_1 = OpConstant %uint 1
-       %bool = OpTypeBool
-          %3 = OpTypeFunction %bool
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-         %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-      %false = OpSpecConstantFalse %bool
- %test_value = OpFunction %bool None %3
-      %entry = OpLabel
-               OpReturnValue %false
-               OpFunctionEnd
-         %11 = OpFunction %void None %10
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-    %entry_0 = OpLabel
-       %call = OpFunctionCall %bool %test_value
-   %frombool = OpSelect %uchar %call %uchar_1 %uchar_0
-     %tobool = OpINotEqual %bool %frombool %uchar_0
-               OpBranchConditional %tobool %if_then %if_end
-    %if_then = OpLabel
-         %21 = OpLoad %uchar %output_value Aligned 1
-       %conv = OpUConvert %uint %21
-        %add = OpIAdd %uint %conv %uint_1
-      %conv1 = OpUConvert %uchar %add
-               OpStore %output_value %conv1 Aligned 1
-               OpBranch %if_end
-     %if_end = OpLabel
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm64
deleted file mode 100644
index c76bccc..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_false_simple.spvasm64
+++ /dev/null
@@ -1,55 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 28
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Linkage
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %11 "spec_const_kernel"
-         %27 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %test_value "test_value"
-               OpName %entry "entry"
-               OpName %output_value "output_value"
-               OpName %entry_0 "entry"
-               OpName %if_then "if.then"
-               OpName %if_end "if.end"
-               OpDecorate %test_value FuncParamAttr Zext
-               OpDecorate %test_value LinkageAttributes "test_value" Export
-               OpDecorate %false SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpConstant %uchar 0
-    %uchar_1 = OpConstant %uchar 1
-     %uint_1 = OpConstant %uint 1
-       %bool = OpTypeBool
-          %3 = OpTypeFunction %bool
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-         %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-      %false = OpSpecConstantFalse %bool
- %test_value = OpFunction %bool None %3
-      %entry = OpLabel
-               OpReturnValue %false
-               OpFunctionEnd
-         %11 = OpFunction %void None %10
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-    %entry_0 = OpLabel
-       %call = OpFunctionCall %bool %test_value
-   %frombool = OpSelect %uchar %call %uchar_1 %uchar_0
-     %tobool = OpINotEqual %bool %frombool %uchar_0
-               OpBranchConditional %tobool %if_then %if_end
-    %if_then = OpLabel
-         %21 = OpLoad %uchar %output_value Aligned 1
-       %conv = OpUConvert %uint %21
-        %add = OpIAdd %uint %conv %uint_1
-      %conv1 = OpUConvert %uchar %add
-               OpStore %output_value %conv1 Aligned 1
-               OpBranch %if_end
-     %if_end = OpLabel
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm32
deleted file mode 100644
index b3163af..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm32
+++ /dev/null
@@ -1,35 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.float*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 4
-               OpDecorate %float_0 SpecId 101
-      %float = OpTypeFloat 32
-%_ptr_UniformConstant_float = OpTypePointer UniformConstant %float
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
-    %float_0 = OpSpecConstant %float 0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_float UniformConstant %float_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_float
-      %entry = OpLabel
-         %12 = OpLoad %float %output_value Aligned 4
-         %13 = OpLoad %float %spec_const_kernel_spec_constant_value Aligned 4
-        %add = OpFAdd %float %12 %13
-               OpStore %output_value %add Aligned 4
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm64
deleted file mode 100644
index 006cdc7..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_float_simple.spvasm64
+++ /dev/null
@@ -1,35 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.float*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 4
-               OpDecorate %float_0 SpecId 101
-      %float = OpTypeFloat 32
-%_ptr_UniformConstant_float = OpTypePointer UniformConstant %float
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
-    %float_0 = OpSpecConstant %float 0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_float UniformConstant %float_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_float
-      %entry = OpLabel
-         %12 = OpLoad %float %output_value Aligned 4
-         %13 = OpLoad %float %spec_const_kernel_spec_constant_value Aligned 4
-        %add = OpFAdd %float %12 %13
-               OpStore %output_value %add Aligned 4
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm32
deleted file mode 100644
index 2195ebe..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm32
+++ /dev/null
@@ -1,36 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Float16Buffer
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.half*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 2
-               OpDecorate %half_0x0p_0 SpecId 101
-       %half = OpTypeFloat 16
-%_ptr_UniformConstant_half = OpTypePointer UniformConstant %half
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_half
-%half_0x0p_0 = OpSpecConstant %half 0x0p+0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_half UniformConstant %half_0x0p_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_half
-      %entry = OpLabel
-         %12 = OpLoad %half %output_value Aligned 2
-         %13 = OpLoad %half %spec_const_kernel_spec_constant_value Aligned 2
-        %add = OpFAdd %half %12 %13
-               OpStore %output_value %add Aligned 2
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm64
deleted file mode 100644
index 47dc418..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_half_simple.spvasm64
+++ /dev/null
@@ -1,36 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Float16Buffer
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.half*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 2
-               OpDecorate %half_0x0p_0 SpecId 101
-       %half = OpTypeFloat 16
-%_ptr_UniformConstant_half = OpTypePointer UniformConstant %half
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_half
-%half_0x0p_0 = OpSpecConstant %half 0x0p+0
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_half UniformConstant %half_0x0p_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_half
-      %entry = OpLabel
-         %12 = OpLoad %half %output_value Aligned 2
-         %13 = OpLoad %half %spec_const_kernel_spec_constant_value Aligned 2
-        %add = OpFAdd %half %12 %13
-               OpStore %output_value %add Aligned 2
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm32
deleted file mode 100644
index 834b85d..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm32
+++ /dev/null
@@ -1,55 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 28
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Linkage
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %11 "spec_const_kernel"
-         %27 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %test_value "test_value"
-               OpName %entry "entry"
-               OpName %output_value "output_value"
-               OpName %entry_0 "entry"
-               OpName %if_then "if.then"
-               OpName %if_end "if.end"
-               OpDecorate %test_value FuncParamAttr Zext
-               OpDecorate %test_value LinkageAttributes "test_value" Export
-               OpDecorate %true SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpConstant %uchar 0
-    %uchar_1 = OpConstant %uchar 1
-     %uint_1 = OpConstant %uint 1
-       %bool = OpTypeBool
-          %3 = OpTypeFunction %bool
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-         %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-       %true = OpSpecConstantTrue %bool
- %test_value = OpFunction %bool None %3
-      %entry = OpLabel
-               OpReturnValue %true
-               OpFunctionEnd
-         %11 = OpFunction %void None %10
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-    %entry_0 = OpLabel
-       %call = OpFunctionCall %bool %test_value
-   %frombool = OpSelect %uchar %call %uchar_1 %uchar_0
-     %tobool = OpINotEqual %bool %frombool %uchar_0
-               OpBranchConditional %tobool %if_end %if_then
-    %if_then = OpLabel
-         %21 = OpLoad %uchar %output_value Aligned 1
-       %conv = OpUConvert %uint %21
-        %add = OpIAdd %uint %conv %uint_1
-      %conv1 = OpUConvert %uchar %add
-               OpStore %output_value %conv1 Aligned 1
-               OpBranch %if_end
-     %if_end = OpLabel
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm64
deleted file mode 100644
index 83ce4d6..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_true_simple.spvasm64
+++ /dev/null
@@ -1,55 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 28
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Linkage
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %11 "spec_const_kernel"
-         %27 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %test_value "test_value"
-               OpName %entry "entry"
-               OpName %output_value "output_value"
-               OpName %entry_0 "entry"
-               OpName %if_then "if.then"
-               OpName %if_end "if.end"
-               OpDecorate %test_value FuncParamAttr Zext
-               OpDecorate %test_value LinkageAttributes "test_value" Export
-               OpDecorate %true SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpConstant %uchar 0
-    %uchar_1 = OpConstant %uchar 1
-     %uint_1 = OpConstant %uint 1
-       %bool = OpTypeBool
-          %3 = OpTypeFunction %bool
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-         %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-       %true = OpSpecConstantTrue %bool
- %test_value = OpFunction %bool None %3
-      %entry = OpLabel
-               OpReturnValue %true
-               OpFunctionEnd
-         %11 = OpFunction %void None %10
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-    %entry_0 = OpLabel
-       %call = OpFunctionCall %bool %test_value
-   %frombool = OpSelect %uchar %call %uchar_1 %uchar_0
-     %tobool = OpINotEqual %bool %frombool %uchar_0
-               OpBranchConditional %tobool %if_end %if_then
-    %if_then = OpLabel
-         %21 = OpLoad %uchar %output_value Aligned 1
-       %conv = OpUConvert %uint %21
-        %add = OpIAdd %uint %conv %uint_1
-      %conv1 = OpUConvert %uchar %add
-               OpStore %output_value %conv1 Aligned 1
-               OpBranch %if_end
-     %if_end = OpLabel
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm32
deleted file mode 100644
index 0e832b4..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm32
+++ /dev/null
@@ -1,37 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 20
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %19 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 1
-               OpDecorate %uchar_0 SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpSpecConstant %uchar 0
-%_ptr_UniformConstant_uchar = OpTypePointer UniformConstant %uchar
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_uchar UniformConstant %uchar_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-      %entry = OpLabel
-         %12 = OpLoad %uchar %output_value Aligned 1
-         %15 = OpLoad %uchar %spec_const_kernel_spec_constant_value Aligned 1
-        %add = OpIAdd %uchar %12 %15
-               OpStore %output_value %add Aligned 1
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm64
deleted file mode 100644
index 89150fc..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uchar_simple.spvasm64
+++ /dev/null
@@ -1,37 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 20
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int8
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %19 = OpString "kernel_arg_type.spec_const_kernel.uchar*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 1
-               OpDecorate %uchar_0 SpecId 101
-      %uchar = OpTypeInt 8 0
-       %uint = OpTypeInt 32 0
-    %uchar_0 = OpSpecConstant %uchar 0
-%_ptr_UniformConstant_uchar = OpTypePointer UniformConstant %uchar
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_uchar UniformConstant %uchar_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
-      %entry = OpLabel
-         %12 = OpLoad %uchar %output_value Aligned 1
-         %15 = OpLoad %uchar %spec_const_kernel_spec_constant_value Aligned 1
-        %add = OpIAdd %uchar %12 %15
-               OpStore %output_value %add Aligned 1
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm32
deleted file mode 100644
index 7bf0f12..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm32
+++ /dev/null
@@ -1,35 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.uint*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 4
-               OpDecorate %uint_0 SpecId 101
-       %uint = OpTypeInt 32 0
-     %uint_0 = OpSpecConstant %uint 0
-%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uint
-      %entry = OpLabel
-         %12 = OpLoad %uint %output_value Aligned 4
-         %13 = OpLoad %uint %spec_const_kernel_spec_constant_value Aligned 4
-        %add = OpIAdd %uint %12 %13
-               OpStore %output_value %add Aligned 4
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm64
deleted file mode 100644
index a73bf24..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_uint_simple.spvasm64
+++ /dev/null
@@ -1,35 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.uint*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 4
-               OpDecorate %uint_0 SpecId 101
-       %uint = OpTypeInt 32 0
-     %uint_0 = OpSpecConstant %uint 0
-%_ptr_UniformConstant_uint = OpTypePointer UniformConstant %uint
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_uint UniformConstant %uint_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_uint
-      %entry = OpLabel
-         %12 = OpLoad %uint %output_value Aligned 4
-         %13 = OpLoad %uint %spec_const_kernel_spec_constant_value Aligned 4
-        %add = OpIAdd %uint %12 %13
-               OpStore %output_value %add Aligned 4
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm32
deleted file mode 100644
index c4e2ef7..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm32
+++ /dev/null
@@ -1,36 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int64
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.ulong*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 8
-               OpDecorate %ulong_0 SpecId 101
-      %ulong = OpTypeInt 64 0
-    %ulong_0 = OpSpecConstant %ulong 0
-%_ptr_UniformConstant_ulong = OpTypePointer UniformConstant %ulong
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_ulong UniformConstant %ulong_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
-      %entry = OpLabel
-         %12 = OpLoad %ulong %output_value Aligned 8
-         %13 = OpLoad %ulong %spec_const_kernel_spec_constant_value Aligned 8
-        %add = OpIAdd %ulong %12 %13
-               OpStore %output_value %add Aligned 8
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm64
deleted file mode 100644
index 3ec3a8f..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ulong_simple.spvasm64
+++ /dev/null
@@ -1,36 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 16
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int64
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %15 = OpString "kernel_arg_type.spec_const_kernel.ulong*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpName %add "add"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 8
-               OpDecorate %ulong_0 SpecId 101
-      %ulong = OpTypeInt 64 0
-    %ulong_0 = OpSpecConstant %ulong 0
-%_ptr_UniformConstant_ulong = OpTypePointer UniformConstant %ulong
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_ulong UniformConstant %ulong_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
-      %entry = OpLabel
-         %12 = OpLoad %ulong %output_value Aligned 8
-         %13 = OpLoad %ulong %spec_const_kernel_spec_constant_value Aligned 8
-        %add = OpIAdd %ulong %12 %13
-               OpStore %output_value %add Aligned 8
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm32
deleted file mode 100644
index d501207..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm32
+++ /dev/null
@@ -1,36 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 20
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int16
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %19 = OpString "kernel_arg_type.spec_const_kernel.ushort*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 2
-               OpDecorate %ushort_0 SpecId 101
-     %ushort = OpTypeInt 16 0
-       %uint = OpTypeInt 32 0
-   %ushort_0 = OpSpecConstant %ushort 0
-%_ptr_UniformConstant_ushort = OpTypePointer UniformConstant %ushort
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_ushort UniformConstant %ushort_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
-      %entry = OpLabel
-         %12 = OpLoad %ushort %output_value Aligned 2
-         %15 = OpLoad %ushort %spec_const_kernel_spec_constant_value Aligned 2
-        %add = OpIAdd %ushort %12 %15
-               OpStore %output_value %add Aligned 2
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm64
deleted file mode 100644
index 27f5fb8..0000000
--- a/test_conformance/spirv_new/spirv_asm/op_spec_constant_ushort_simple.spvasm64
+++ /dev/null
@@ -1,36 +0,0 @@
-; Version: 1.0
-; Generator: Khronos LLVM/SPIR-V Translator; 14
-; Bound: 20
-; Schema: 0
-               OpCapability Addresses
-               OpCapability Kernel
-               OpCapability Int16
-          %1 = OpExtInstImport "OpenCL.std"
-               OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %9 "spec_const_kernel"
-         %19 = OpString "kernel_arg_type.spec_const_kernel.ushort*,"
-               OpSource OpenCL_C 102000
-               OpName %spec_const_kernel_spec_constant_value "spec_const_kernel.spec_constant_value"
-               OpName %output_value "output_value"
-               OpName %entry "entry"
-               OpDecorate %spec_const_kernel_spec_constant_value Constant
-               OpDecorate %spec_const_kernel_spec_constant_value Alignment 2
-               OpDecorate %ushort_0 SpecId 101
-     %ushort = OpTypeInt 16 0
-       %uint = OpTypeInt 32 0
-   %ushort_0 = OpSpecConstant %ushort 0
-%_ptr_UniformConstant_ushort = OpTypePointer UniformConstant %ushort
-       %void = OpTypeVoid
-%_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
-          %8 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%spec_const_kernel_spec_constant_value = OpVariable %_ptr_UniformConstant_ushort UniformConstant %ushort_0
-          %9 = OpFunction %void None %8
-%output_value = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
-      %entry = OpLabel
-         %12 = OpLoad %ushort %output_value Aligned 2
-         %15 = OpLoad %ushort %spec_const_kernel_spec_constant_value Aligned 2
-        %add = OpIAdd %ushort %12 %15
-               OpStore %output_value %add Aligned 2
-               OpReturn
-               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/opaque.spvasm32 b/test_conformance/spirv_new/spirv_asm/opaque.spvasm32
index e9a0a5a..ef133c1 100644
--- a/test_conformance/spirv_new/spirv_asm/opaque.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/opaque.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "opaque" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "opaque"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %4 LinkageAttributes "opaque_store" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %Opaque_opaque_t = OpTypeOpaque "opaque_t"
 %_ptr_CrossWorkgroup_Opaque_opaque_t = OpTypePointer CrossWorkgroup %Opaque_opaque_t
 %float_3_14159274 = OpConstant %float 3.14159274
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_Opaque_opaque_t
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_Opaque_opaque_t %uint %float
           %4 = OpFunction %void None %14
          %15 = OpFunctionParameter %_ptr_CrossWorkgroup_Opaque_opaque_t
diff --git a/test_conformance/spirv_new/spirv_asm/opaque.spvasm64 b/test_conformance/spirv_new/spirv_asm/opaque.spvasm64
index 0759c28..50ef5a6 100644
--- a/test_conformance/spirv_new/spirv_asm/opaque.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/opaque.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "opaque" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "opaque"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,12 +17,12 @@
                OpDecorate %4 LinkageAttributes "opaque_store" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
    %ulong_32 = OpConstant %ulong 32
 %float_3_14159274 = OpConstant %float 3.14159274
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
 %Opaque_opaque_t = OpTypeOpaque "opaque_t"
 %_ptr_CrossWorkgroup_Opaque_opaque_t = OpTypePointer CrossWorkgroup %Opaque_opaque_t
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_Opaque_opaque_t
diff --git a/test_conformance/spirv_new/spirv_asm/phi_2.spvasm32 b/test_conformance/spirv_new/spirv_asm/phi_2.spvasm32
index 7a6311a..adcfac7 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_2.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/phi_2.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "phi_2" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_2"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,12 +19,12 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/phi_2.spvasm64 b/test_conformance/spirv_new/spirv_asm/phi_2.spvasm64
index 538c5f7..86ef7c3 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_2.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/phi_2.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "phi_2" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_2"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,14 +20,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/phi_3.spvasm32 b/test_conformance/spirv_new/spirv_asm/phi_3.spvasm32
index fef5c24..61ce608 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_3.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/phi_3.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "phi_3" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_3"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,13 +19,13 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/phi_3.spvasm64 b/test_conformance/spirv_new/spirv_asm/phi_3.spvasm64
index 9ea0fc8..4029dd6 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_3.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/phi_3.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "phi_3" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_3"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,14 +21,14 @@
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
    %ulong_32 = OpConstant %ulong 32
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/phi_4.spvasm32 b/test_conformance/spirv_new/spirv_asm/phi_4.spvasm32
index 575ff3d..3ed6808 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_4.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/phi_4.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "phi_4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_4"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,13 +19,13 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %14
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/phi_4.spvasm64 b/test_conformance/spirv_new/spirv_asm/phi_4.spvasm64
index ffa1fc1..ab07cad 100644
--- a/test_conformance/spirv_new/spirv_asm/phi_4.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/phi_4.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "phi_4" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "phi_4"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %bool = OpTypeBool
        %void = OpTypeVoid
@@ -28,7 +28,7 @@
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm32
index 53883e3..d3baef2 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_if_dont_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_dont_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm64
index 5939b71..5dd2d99 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_if_dont_flatten.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_if_dont_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_dont_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm32
index 0b05732..3e9fa20 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_if_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm64
index e3e4cf9..f246ece 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_if_flatten.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_if_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm32
index b821788..1aca937 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_if_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_none"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,14 +19,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm64
index 6a45bce..47526e9 100644
--- a/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_if_none.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_if_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_if_none"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -29,7 +29,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %17 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %17
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm32
index fa459f7..b1b6e04 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_dont_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_dont_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,7 +19,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -30,7 +30,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm64
index cae44be..47b97b9 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_dont_flatten.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_dont_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_dont_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -33,7 +33,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %21 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %21
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm32
index 441fafb..3684162 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,7 +19,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -30,7 +30,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm64
index 9e56514..1cbfc28 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_flatten.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_flatten" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_flatten"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -33,7 +33,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %21 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %21
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm32 b/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm32
index ddaf22a..899d64e 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_none"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -19,7 +19,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %bool = OpTypeBool
        %void = OpTypeVoid
      %uint_0 = OpConstant %uint 0
@@ -30,7 +30,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %19 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %19
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm64 b/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm64
index 27a3983..c905595 100644
--- a/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/select_switch_none.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "select_switch_none" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "select_switch_none"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,7 +20,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %bool = OpTypeBool
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
@@ -33,7 +33,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_Function_uint = OpTypePointer Function %uint
          %21 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %21
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm32
index ad77b2a..fb4c78b 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm64
index 4e4768d..65d657e 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_char_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm32
index 631c179..ed4a47d 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm64
index 6e30116..6ae714d 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_double_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_double_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_double_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_double
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm32
index 8886283..d80afe1 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_false_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_false_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,14 +15,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %bool = OpTypeBool
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm64
index 4b781b1..99fc3f6 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_false_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_false_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_false_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
        %bool = OpTypeBool
@@ -25,7 +25,7 @@
    %ulong_32 = OpConstant %ulong 32
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm32
index 93a69a8..5b20017 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm64
index f2e1d24..ec56354 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_float_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm32
index 6f05643..acebc88 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm64
index eff63f8..5f7ffd3 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_half_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_half_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_half_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %float = OpTypeFloat 32
        %half = OpTypeFloat 16
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_float
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm32
index 7839d45..6247618 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_v3uint = OpTypePointer CrossWorkgroup %v3uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_v3uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %10 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm64
index 7369f2f..73b543d 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_int3_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_int3_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int3_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
 %_ptr_CrossWorkgroup_v3uint = OpTypePointer CrossWorkgroup %v3uint
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v3uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v3uint
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm32
index 6f30a9b..75f83b5 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,12 +15,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm64
index d42a157..6d0f940 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_int4_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_int4_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int4_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm32
index dfa8ab4..f9382c3 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm64
index 0aecf10..7d6f8ba 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_int_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_int_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_int_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm32
index 759017b..b05e322 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm64
index 94b6d21..e67626d 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_long_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_long_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_long_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm32
index ab7904c..c4fb091 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm64
index 0d31d92..9078c85 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_short_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_short_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_short_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm32
index ddb9bf4..a9527fa 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Linkage
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
   %_struct_9 = OpTypeStruct %uint %uchar
 %_ptr_CrossWorkgroup__struct_9 = OpTypePointer CrossWorkgroup %_struct_9
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_9
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm64
index 41f7856..19185c3 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_int_char_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_int_char_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_int_char_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %uchar = OpTypeInt 8 0
@@ -25,7 +25,7 @@
 %_ptr_CrossWorkgroup__struct_10 = OpTypePointer CrossWorkgroup %_struct_10
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_10
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm32
index cad9452..69750ca 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Kernel
                OpCapability Linkage
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,13 +15,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
   %_struct_9 = OpTypeStruct %uint %float
 %_ptr_CrossWorkgroup__struct_9 = OpTypePointer CrossWorkgroup %_struct_9
          %11 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_9
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_9
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm64
index cdeadaa..b748de8 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_int_float_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_int_float_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_int_float_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
       %float = OpTypeFloat 32
@@ -24,7 +24,7 @@
 %_ptr_CrossWorkgroup__struct_10 = OpTypePointer CrossWorkgroup %_struct_10
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_10
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_10
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm32
index d3605a5..e2812a6 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Linkage
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v2uint = OpTypeVector %uint 2
       %uchar = OpTypeInt 8 0
@@ -24,7 +24,7 @@
  %_struct_11 = OpTypeStruct %v2uint %_struct_10
 %_ptr_CrossWorkgroup__struct_11 = OpTypePointer CrossWorkgroup %_struct_11
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_11
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_11
          %14 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm64
index c46f254..35d05d0 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_struct_struct_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_struct_struct_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_struct_struct_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
      %v2uint = OpTypeVector %uint 2
@@ -27,7 +27,7 @@
 %_ptr_CrossWorkgroup__struct_12 = OpTypePointer CrossWorkgroup %_struct_12
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup__struct_12
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup__struct_12
          %16 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm32
index fa4854b..5b356cd 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_true_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_true_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,14 +15,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
        %bool = OpTypeBool
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %13 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm64
index 352e69c..9bf35d7 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_true_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_true_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_true_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,7 +16,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
        %bool = OpTypeBool
@@ -25,7 +25,7 @@
    %ulong_32 = OpConstant %ulong 32
      %uint_1 = OpConstant %uint 1
      %uint_0 = OpConstant %uint 0
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %11
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %15 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm32
index d16cb50..868ccfc 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm64
index 4903b9c..4060237 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_uchar_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int8
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_uchar_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_uchar_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm32
index 65f38e0..e281b55 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm64
index bc27d85..1193428 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_uint_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_uint_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_uint_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm32
index fb343c3..8e79a65 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm64
index 953a0c9..bc595e4 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_ulong_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_ulong_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_ulong_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm32
index ce8380b..5425638 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "undef_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,12 +16,12 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %11 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm64
index 8fd17af..0dff1dd 100644
--- a/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/undef_ushort_simple.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Int16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "undef_ushort_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "undef_ushort_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
        %void = OpTypeVoid
      %ushort = OpTypeInt 16 0
 %_ptr_CrossWorkgroup_ushort = OpTypePointer CrossWorkgroup %ushort
          %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_ushort
    %ulong_32 = OpConstant %ulong 32
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %10
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ushort
          %12 = OpLabel
diff --git a/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm32 b/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm32
index 20e3a70..1c40b60 100644
--- a/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "unreachable_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "unreachable_simple"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -15,11 +15,11 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
           %9 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %9
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
          %10 = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm64 b/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm64
index 0c71180..eefd816 100644
--- a/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/unreachable_simple.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "unreachable_simple" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "unreachable_simple"
                OpName %in "in"
                OpName %out "out"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
@@ -17,13 +17,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %void = OpTypeVoid
        %uint = OpTypeInt 32 0
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
         %out = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm32
index 48fb4ea..9ea1871 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Vector16
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_char16_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_char16_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -18,14 +18,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
    %v16uchar = OpTypeVector %uchar 16
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
 %_ptr_CrossWorkgroup_v16uchar = OpTypePointer CrossWorkgroup %v16uchar
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v16uchar %_ptr_CrossWorkgroup_uchar %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v16uchar
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
diff --git a/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm64
index 16f38a9..ce4d72c 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_char16_extract.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int8
                OpCapability Vector16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_char16_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_char16_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -19,7 +19,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
 %_ptr_CrossWorkgroup_v16uchar = OpTypePointer CrossWorkgroup %v16uchar
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v16uchar %_ptr_CrossWorkgroup_uchar %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v16uchar
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
diff --git a/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm32
index 709161f..0ccbbf9 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm32
@@ -9,7 +9,7 @@
                OpCapability Vector16
                OpCapability Int8
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_char16_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_char16_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -18,14 +18,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %uchar = OpTypeInt 8 0
    %v16uchar = OpTypeVector %uchar 16
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
 %_ptr_CrossWorkgroup_v16uchar = OpTypePointer CrossWorkgroup %v16uchar
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_v16uchar %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v16uchar
diff --git a/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm64
index b5ef30b..dbb7cf5 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_char16_insert.spvasm64
@@ -10,7 +10,7 @@
                OpCapability Int8
                OpCapability Vector16
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_char16_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_char16_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -19,7 +19,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -28,7 +28,7 @@
 %_ptr_CrossWorkgroup_uchar = OpTypePointer CrossWorkgroup %uchar
 %_ptr_CrossWorkgroup_v16uchar = OpTypePointer CrossWorkgroup %v16uchar
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_uchar %_ptr_CrossWorkgroup_v16uchar %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uchar
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v16uchar
diff --git a/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm32
index ec19d0c..559337c 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_double2_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_double2_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_double %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm64
index 0a6f245..ccf2fb4 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_double2_extract.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_double2_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_double2_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -18,7 +18,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -27,7 +27,7 @@
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2double %_ptr_CrossWorkgroup_double %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_double
diff --git a/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm32
index b434804..1d4125a 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_double2_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_double2_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
    %v2double = OpTypeVector %double 2
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_v2double %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm64
index 126f591..48abb07 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_double2_insert.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_double2_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_double2_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -18,7 +18,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -27,7 +27,7 @@
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
 %_ptr_CrossWorkgroup_v2double = OpTypePointer CrossWorkgroup %v2double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_double %_ptr_CrossWorkgroup_v2double %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_double
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v2double
diff --git a/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm32
index 62fd998..c3a1d9d 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_float4_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_float4_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_float %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm64
index fbec1e3..e7df3b0 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_float4_extract.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_float4_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_float4_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_float %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_float
diff --git a/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm32
index 16993f5..7cc0d21 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_float4_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_float4_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,14 +16,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_v4float %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm64
index e113bb5..24305d2 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_float4_insert.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_float4_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_float4_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -26,7 +26,7 @@
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_float %_ptr_CrossWorkgroup_v4float %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %15
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_float
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm32
index f64b0bb..441623f 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_int4_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_int4_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %_ptr_CrossWorkgroup_uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm64
index f9903e1..5298a51 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_int4_extract.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_int4_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_int4_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -25,7 +25,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4uint %_ptr_CrossWorkgroup_uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_uint
diff --git a/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm32
index 265b9cc..e0ae874 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm32
@@ -7,7 +7,7 @@
                OpCapability Linkage
                OpCapability Kernel
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_int4_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_int4_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -16,13 +16,13 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %v4uint = OpTypeVector %uint 4
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %12 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_v4uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %12
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
diff --git a/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm64
index be2f94e..426ace1 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_int4_insert.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_int4_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_int4_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -25,7 +25,7 @@
 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
 %_ptr_CrossWorkgroup_v4uint = OpTypePointer CrossWorkgroup %v4uint
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint %_ptr_CrossWorkgroup_v4uint %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_uint
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v4uint
diff --git a/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm32
index e925ec2..22925a0 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_long2_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_long2_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
     %v2ulong = OpTypeVector %ulong 2
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
 %_ptr_CrossWorkgroup_v2ulong = OpTypePointer CrossWorkgroup %v2ulong
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2ulong %_ptr_CrossWorkgroup_ulong %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v2ulong
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
diff --git a/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm64
index b1c9562..5ebac41 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_long2_extract.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_long2_extract" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_long2_extract"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -25,7 +25,7 @@
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
 %_ptr_CrossWorkgroup_v2ulong = OpTypePointer CrossWorkgroup %v2ulong
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_v2ulong %_ptr_CrossWorkgroup_ulong %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_v2ulong
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
diff --git a/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm32
index 49bc9f6..d9cafab 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_long2_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_long2_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,14 +17,14 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %ulong = OpTypeInt 64 0
     %v2ulong = OpTypeVector %ulong 2
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
 %_ptr_CrossWorkgroup_v2ulong = OpTypePointer CrossWorkgroup %v2ulong
          %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_v2ulong %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %13
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v2ulong
diff --git a/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm64
index 74ae94c..f37e03d 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_long2_insert.spvasm64
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_long2_insert" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_long2_insert"
                OpName %in "in"
                OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
                OpDecorate %gl_GlobalInvocationID Constant
@@ -17,7 +17,7 @@
                OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %uint = OpTypeInt 32 0
        %void = OpTypeVoid
@@ -25,7 +25,7 @@
 %_ptr_CrossWorkgroup_ulong = OpTypePointer CrossWorkgroup %ulong
 %_ptr_CrossWorkgroup_v2ulong = OpTypePointer CrossWorkgroup %v2ulong
          %14 = OpTypeFunction %void %_ptr_CrossWorkgroup_ulong %_ptr_CrossWorkgroup_v2ulong %uint
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %14
          %in = OpFunctionParameter %_ptr_CrossWorkgroup_ulong
           %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v2ulong
diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm32
index ec2c415..24d5b2a 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Float64
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_times_scalar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,14 +20,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
      %double = OpTypeFloat 64
 %_ptr_CrossWorkgroup_double = OpTypePointer CrossWorkgroup %double
    %v4double = OpTypeVector %double 4
 %_ptr_CrossWorkgroup_v4double = OpTypePointer CrossWorkgroup %v4double
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4double %_ptr_CrossWorkgroup_v4double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4double
diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm64
index cd3755e..43e6487 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_double.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int64
                OpCapability Float64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_times_scalar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,7 +21,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %void = OpTypeVoid
      %double = OpTypeFloat 64
@@ -29,7 +29,7 @@
    %v4double = OpTypeVector %double 4
 %_ptr_CrossWorkgroup_v4double = OpTypePointer CrossWorkgroup %v4double
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4double %_ptr_CrossWorkgroup_v4double %_ptr_CrossWorkgroup_double
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4double
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4double
diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm32
index 5c8b337..7e44936 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm32
+++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm32
@@ -8,7 +8,7 @@
                OpCapability Kernel
                OpCapability Int16
                OpMemoryModel Physical32 OpenCL
-               OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_times_scalar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -20,14 +20,14 @@
                OpGroupDecorate %5 %res %lhs %rhs
        %uint = OpTypeInt 32 0
      %v3uint = OpTypeVector %uint 3
-%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_UniformConstant_v3uint = OpTypePointer UniformConstant %v3uint
        %void = OpTypeVoid
       %float = OpTypeFloat 32
 %_ptr_CrossWorkgroup_float = OpTypePointer CrossWorkgroup %float
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3uint UniformConstant
           %1 = OpFunction %void None %15
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm64
index ecc078b..0fbd038 100644
--- a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm64
+++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_float.spvasm64
@@ -9,7 +9,7 @@
                OpCapability Int16
                OpCapability Int64
                OpMemoryModel Physical64 OpenCL
-               OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID
+               OpEntryPoint Kernel %1 "vector_times_scalar"
                OpName %res "res"
                OpName %lhs "lhs"
                OpName %rhs "rhs"
@@ -21,7 +21,7 @@
                OpGroupDecorate %5 %res %lhs %rhs
       %ulong = OpTypeInt 64 0
     %v3ulong = OpTypeVector %ulong 3
-%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+%_ptr_UniformConstant_v3ulong = OpTypePointer UniformConstant %v3ulong
    %ulong_32 = OpConstant %ulong 32
        %void = OpTypeVoid
       %float = OpTypeFloat 32
@@ -29,7 +29,7 @@
     %v4float = OpTypeVector %float 4
 %_ptr_CrossWorkgroup_v4float = OpTypePointer CrossWorkgroup %v4float
          %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_v4float %_ptr_CrossWorkgroup_float
-%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+%gl_GlobalInvocationID = OpVariable %_ptr_UniformConstant_v3ulong UniformConstant
           %1 = OpFunction %void None %16
         %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
         %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4float
diff --git a/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv32 b/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv32
new file mode 100644
index 0000000..9f07a61
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv64 b/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv64
new file mode 100644
index 0000000..d590b8b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/atomic_dec_global.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv32 b/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv32
new file mode 100644
index 0000000..056b457
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv64 b/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv64
new file mode 100644
index 0000000..a049df8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/atomic_inc_global.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/branch_conditional.spv32 b/test_conformance/spirv_new/spirv_bin/branch_conditional.spv32
new file mode 100644
index 0000000..2f731ac
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_conditional.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/branch_conditional.spv64 b/test_conformance/spirv_new/spirv_bin/branch_conditional.spv64
new file mode 100644
index 0000000..cf1b875
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_conditional.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv32 b/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv32
new file mode 100644
index 0000000..a0eb511
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv64 b/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv64
new file mode 100644
index 0000000..bb1520f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_conditional_weighted.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/branch_simple.spv32 b/test_conformance/spirv_new/spirv_bin/branch_simple.spv32
new file mode 100644
index 0000000..c3c7103
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/branch_simple.spv64 b/test_conformance/spirv_new/spirv_bin/branch_simple.spv64
new file mode 100644
index 0000000..a5795b5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/branch_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv32 b/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv32
new file mode 100644
index 0000000..a17daa5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv64 b/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv64
new file mode 100644
index 0000000..279a217
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/composite_construct_int4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv32 b/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv32
new file mode 100644
index 0000000..4f881c5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv64 b/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv64
new file mode 100644
index 0000000..22a21cd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/composite_construct_struct.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv32
new file mode 100644
index 0000000..7f0a8fe
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv64
new file mode 100644
index 0000000..0a9b667
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_char_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv32
new file mode 100644
index 0000000..bb6b2bc
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv64
new file mode 100644
index 0000000..4cb895c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_double_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv32
new file mode 100644
index 0000000..e4602a2
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv64
new file mode 100644
index 0000000..302ac27
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_false_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv32
new file mode 100644
index 0000000..a97471b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv64
new file mode 100644
index 0000000..2891abf
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_float_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv32
new file mode 100644
index 0000000..84c8019
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv64
new file mode 100644
index 0000000..a05102a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_half_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv32
new file mode 100644
index 0000000..879ed90
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv64
new file mode 100644
index 0000000..ea5580e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int3_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv32
new file mode 100644
index 0000000..4eb2ff5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv64
new file mode 100644
index 0000000..e4aeb68
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int4_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv32
new file mode 100644
index 0000000..34ed429
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv64
new file mode 100644
index 0000000..ce03e97
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_int_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv32
new file mode 100644
index 0000000..b99b03f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv64
new file mode 100644
index 0000000..04a00d9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_long_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv32
new file mode 100644
index 0000000..b121db4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv64
new file mode 100644
index 0000000..3f23030
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_short_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv32
new file mode 100644
index 0000000..4b57e43
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv64
new file mode 100644
index 0000000..39a461c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_int_char_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv32
new file mode 100644
index 0000000..7a9b688
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv64
new file mode 100644
index 0000000..b77f547
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_int_float_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv32
new file mode 100644
index 0000000..853af31
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv64
new file mode 100644
index 0000000..98ee089
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_struct_struct_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv32
new file mode 100644
index 0000000..582afa6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv64
new file mode 100644
index 0000000..1f2306d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_true_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv32
new file mode 100644
index 0000000..1b63faf
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv64
new file mode 100644
index 0000000..8bf38d0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_uchar_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv32
new file mode 100644
index 0000000..2fa31dd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv64
new file mode 100644
index 0000000..eb8ea01
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_uint_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv32
new file mode 100644
index 0000000..29a76a1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv64
new file mode 100644
index 0000000..116ebc8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_ulong_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv32 b/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv32
new file mode 100644
index 0000000..0fbde36
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv64 b/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv64
new file mode 100644
index 0000000..1f35a8d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/constant_ushort_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv32
new file mode 100644
index 0000000..23334e0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv64
new file mode 100644
index 0000000..34c16d9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_char_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv32
new file mode 100644
index 0000000..a04ee05
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv64
new file mode 100644
index 0000000..7fa5ca4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_double_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv32
new file mode 100644
index 0000000..8f5f606
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv64
new file mode 100644
index 0000000..3775784
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_float_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv32
new file mode 100644
index 0000000..84eba79
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv64
new file mode 100644
index 0000000..3943a52
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_half_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv32
new file mode 100644
index 0000000..609c8b2
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv64
new file mode 100644
index 0000000..485fb3c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int3_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv32
new file mode 100644
index 0000000..9865162
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv64
new file mode 100644
index 0000000..1a4e94e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int4_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv32
new file mode 100644
index 0000000..701a9e9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv64
new file mode 100644
index 0000000..61af430
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_int_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv32
new file mode 100644
index 0000000..daf3209
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv64
new file mode 100644
index 0000000..4848064
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_long_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv32
new file mode 100644
index 0000000..cd6b9fe
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv64
new file mode 100644
index 0000000..a706048
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_short_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv32
new file mode 100644
index 0000000..2ce64ab
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv64
new file mode 100644
index 0000000..0e9de18
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_int_char_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv32
new file mode 100644
index 0000000..811b282
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv64
new file mode 100644
index 0000000..832efe7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_int_float_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv32
new file mode 100644
index 0000000..7a1d6ed
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv64
new file mode 100644
index 0000000..c027f9c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_struct_struct_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv32
new file mode 100644
index 0000000..5416c5d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv64
new file mode 100644
index 0000000..ab6c827
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_uchar_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv32
new file mode 100644
index 0000000..053bf6c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv64
new file mode 100644
index 0000000..ca83456
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_uint_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv32
new file mode 100644
index 0000000..8dc0519
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv64
new file mode 100644
index 0000000..b3434bb
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_ulong_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv32 b/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv32
new file mode 100644
index 0000000..3c1045b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv64 b/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv64
new file mode 100644
index 0000000..08db854
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/copy_ushort_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv32
new file mode 100644
index 0000000..5c0444a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv64
new file mode 100644
index 0000000..6a13de3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_aliased.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv32
new file mode 100644
index 0000000..c5ef2f2
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv64
new file mode 100644
index 0000000..d2b5508
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_alignment.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv32
new file mode 100644
index 0000000..6fb0279
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv64
new file mode 100644
index 0000000..267c0fd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_coherent.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_constant.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_constant.spv32
new file mode 100644
index 0000000..cda1bc9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_constant.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_constant.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_constant.spv64
new file mode 100644
index 0000000..b9fa920
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_constant.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv32
new file mode 100644
index 0000000..9ef85b5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv64
new file mode 100644
index 0000000..67700c1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_constant_fail.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv32
new file mode 100644
index 0000000..6872389
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv64
new file mode 100644
index 0000000..9fa9b9a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_cpacked.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv32
new file mode 100644
index 0000000..fae2ef6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv64
new file mode 100644
index 0000000..adbca68
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_nonreadable.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv32
new file mode 100644
index 0000000..1b6ea15
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv64
new file mode 100644
index 0000000..a9ff218
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_nonwritable.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv32
new file mode 100644
index 0000000..6d57eba
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv64
new file mode 100644
index 0000000..a5389e4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_restrict.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv32
new file mode 100644
index 0000000..6574790
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv64
new file mode 100644
index 0000000..b6d6f06
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_double_long.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv32
new file mode 100644
index 0000000..35bd4cf
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv64
new file mode 100644
index 0000000..86094bf
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rte_float_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv32
new file mode 100644
index 0000000..b4d9a42
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv64
new file mode 100644
index 0000000..4ee9b03
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_double_long.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv32
new file mode 100644
index 0000000..2c64302
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv64
new file mode 100644
index 0000000..ff5e2b5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtn_float_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv32
new file mode 100644
index 0000000..3c8a9f6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv64
new file mode 100644
index 0000000..3e8ad6e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_double_long.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv32
new file mode 100644
index 0000000..a78de1c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv64
new file mode 100644
index 0000000..f836c09
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtp_float_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv32
new file mode 100644
index 0000000..cf6a0f7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv64
new file mode 100644
index 0000000..407595c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_double_long.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv32
new file mode 100644
index 0000000..315cbb5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv64
new file mode 100644
index 0000000..5c44027
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_rounding_rtz_float_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv32
new file mode 100644
index 0000000..66ea845
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv64
new file mode 100644
index 0000000..ec87755
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_char.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv32
new file mode 100644
index 0000000..949b331
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv64
new file mode 100644
index 0000000..6cb8ffc
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv32
new file mode 100644
index 0000000..4786da6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv64
new file mode 100644
index 0000000..6b76d27
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_short.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv32
new file mode 100644
index 0000000..589334c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv64
new file mode 100644
index 0000000..31b1082
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uchar.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv32
new file mode 100644
index 0000000..c83a913
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv64
new file mode 100644
index 0000000..0b66792
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_uint.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv32
new file mode 100644
index 0000000..405f98d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv64
new file mode 100644
index 0000000..57b1878
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_saturated_conversion_ushort.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv32 b/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv32
new file mode 100644
index 0000000..5ca6abc
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv64 b/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv64
new file mode 100644
index 0000000..75584f4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/decorate_volatile.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv32
new file mode 100644
index 0000000..403d239
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv64
new file mode 100644
index 0000000..261d7da
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv32
new file mode 100644
index 0000000..a0ad64d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv64
new file mode 100644
index 0000000..8c88c5b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv32
new file mode 100644
index 0000000..3450f6d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv64
new file mode 100644
index 0000000..a201855
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv32
new file mode 100644
index 0000000..a3bff0a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv64
new file mode 100644
index 0000000..8321f37
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv32
new file mode 100644
index 0000000..58ab454
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv64
new file mode 100644
index 0000000..25cba7f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv32
new file mode 100644
index 0000000..be274a7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv64
new file mode 100644
index 0000000..7e00ac8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv32
new file mode 100644
index 0000000..5342bb8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv64
new file mode 100644
index 0000000..ac03353
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv32
new file mode 100644
index 0000000..8427279
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv64
new file mode 100644
index 0000000..9706e0c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv32 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv32
new file mode 100644
index 0000000..9ca74db
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv64 b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv64
new file mode 100644
index 0000000..5a9716a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fadd_double.spv32 b/test_conformance/spirv_new/spirv_bin/fadd_double.spv32
new file mode 100644
index 0000000..84fad15
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_double.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fadd_double.spv64 b/test_conformance/spirv_new/spirv_bin/fadd_double.spv64
new file mode 100644
index 0000000..f453a28
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_double.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fadd_double2.spv32 b/test_conformance/spirv_new/spirv_bin/fadd_double2.spv32
new file mode 100644
index 0000000..724c71d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_double2.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fadd_double2.spv64 b/test_conformance/spirv_new/spirv_bin/fadd_double2.spv64
new file mode 100644
index 0000000..8bb4fe1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_double2.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fadd_float.spv32 b/test_conformance/spirv_new/spirv_bin/fadd_float.spv32
new file mode 100644
index 0000000..238aeee
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_float.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fadd_float.spv64 b/test_conformance/spirv_new/spirv_bin/fadd_float.spv64
new file mode 100644
index 0000000..de28855
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_float.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fadd_float4.spv32 b/test_conformance/spirv_new/spirv_bin/fadd_float4.spv32
new file mode 100644
index 0000000..880f2d9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_float4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fadd_float4.spv64 b/test_conformance/spirv_new/spirv_bin/fadd_float4.spv64
new file mode 100644
index 0000000..652b8f4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_float4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fadd_half.spv32 b/test_conformance/spirv_new/spirv_bin/fadd_half.spv32
new file mode 100644
index 0000000..e029432
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_half.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fadd_half.spv64 b/test_conformance/spirv_new/spirv_bin/fadd_half.spv64
new file mode 100644
index 0000000..ce54ba5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fadd_half.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_double.spv32 b/test_conformance/spirv_new/spirv_bin/fdiv_double.spv32
new file mode 100644
index 0000000..4249cea
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_double.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_double.spv64 b/test_conformance/spirv_new/spirv_bin/fdiv_double.spv64
new file mode 100644
index 0000000..47f39e8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_double.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv32 b/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv32
new file mode 100644
index 0000000..88ef457
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv64 b/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv64
new file mode 100644
index 0000000..6d75316
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_double2.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_float.spv32 b/test_conformance/spirv_new/spirv_bin/fdiv_float.spv32
new file mode 100644
index 0000000..3278dea
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_float.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_float.spv64 b/test_conformance/spirv_new/spirv_bin/fdiv_float.spv64
new file mode 100644
index 0000000..687048c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_float.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv32 b/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv32
new file mode 100644
index 0000000..24b267e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv64 b/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv64
new file mode 100644
index 0000000..5cd84ed
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_float4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_half.spv32 b/test_conformance/spirv_new/spirv_bin/fdiv_half.spv32
new file mode 100644
index 0000000..638c44a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_half.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fdiv_half.spv64 b/test_conformance/spirv_new/spirv_bin/fdiv_half.spv64
new file mode 100644
index 0000000..c7a1bff
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fdiv_half.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmod_double.spv32 b/test_conformance/spirv_new/spirv_bin/fmod_double.spv32
new file mode 100644
index 0000000..80ebe51
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_double.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmod_double.spv64 b/test_conformance/spirv_new/spirv_bin/fmod_double.spv64
new file mode 100644
index 0000000..a9dc49f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_double.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmod_double2.spv32 b/test_conformance/spirv_new/spirv_bin/fmod_double2.spv32
new file mode 100644
index 0000000..f598faf
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_double2.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmod_double2.spv64 b/test_conformance/spirv_new/spirv_bin/fmod_double2.spv64
new file mode 100644
index 0000000..eadd41e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_double2.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmod_float.spv32 b/test_conformance/spirv_new/spirv_bin/fmod_float.spv32
new file mode 100644
index 0000000..354087f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_float.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmod_float.spv64 b/test_conformance/spirv_new/spirv_bin/fmod_float.spv64
new file mode 100644
index 0000000..20a0c70
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_float.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmod_float4.spv32 b/test_conformance/spirv_new/spirv_bin/fmod_float4.spv32
new file mode 100644
index 0000000..d6a853c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_float4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmod_float4.spv64 b/test_conformance/spirv_new/spirv_bin/fmod_float4.spv64
new file mode 100644
index 0000000..1616a10
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_float4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmod_half.spv32 b/test_conformance/spirv_new/spirv_bin/fmod_half.spv32
new file mode 100644
index 0000000..5fac82e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_half.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmod_half.spv64 b/test_conformance/spirv_new/spirv_bin/fmod_half.spv64
new file mode 100644
index 0000000..3a411a3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmod_half.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmul_double.spv32 b/test_conformance/spirv_new/spirv_bin/fmul_double.spv32
new file mode 100644
index 0000000..8018033
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_double.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmul_double.spv64 b/test_conformance/spirv_new/spirv_bin/fmul_double.spv64
new file mode 100644
index 0000000..e7e9807
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_double.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmul_double2.spv32 b/test_conformance/spirv_new/spirv_bin/fmul_double2.spv32
new file mode 100644
index 0000000..4057b94
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_double2.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmul_double2.spv64 b/test_conformance/spirv_new/spirv_bin/fmul_double2.spv64
new file mode 100644
index 0000000..b308270
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_double2.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmul_float.spv32 b/test_conformance/spirv_new/spirv_bin/fmul_float.spv32
new file mode 100644
index 0000000..96d2a18
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_float.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmul_float.spv64 b/test_conformance/spirv_new/spirv_bin/fmul_float.spv64
new file mode 100644
index 0000000..a160a8e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_float.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmul_float4.spv32 b/test_conformance/spirv_new/spirv_bin/fmul_float4.spv32
new file mode 100644
index 0000000..5856ef1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_float4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmul_float4.spv64 b/test_conformance/spirv_new/spirv_bin/fmul_float4.spv64
new file mode 100644
index 0000000..b3c22bd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_float4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmul_half.spv32 b/test_conformance/spirv_new/spirv_bin/fmul_half.spv32
new file mode 100644
index 0000000..f9eff00
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_half.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fmul_half.spv64 b/test_conformance/spirv_new/spirv_bin/fmul_half.spv64
new file mode 100644
index 0000000..467674e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fmul_half.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/frem_double.spv32 b/test_conformance/spirv_new/spirv_bin/frem_double.spv32
new file mode 100644
index 0000000..887e7ec
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_double.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/frem_double.spv64 b/test_conformance/spirv_new/spirv_bin/frem_double.spv64
new file mode 100644
index 0000000..b11c4e5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_double.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/frem_double2.spv32 b/test_conformance/spirv_new/spirv_bin/frem_double2.spv32
new file mode 100644
index 0000000..15a7192
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_double2.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/frem_double2.spv64 b/test_conformance/spirv_new/spirv_bin/frem_double2.spv64
new file mode 100644
index 0000000..13ffb49
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_double2.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/frem_float.spv32 b/test_conformance/spirv_new/spirv_bin/frem_float.spv32
new file mode 100644
index 0000000..7f8e5d5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_float.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/frem_float.spv64 b/test_conformance/spirv_new/spirv_bin/frem_float.spv64
new file mode 100644
index 0000000..214541c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_float.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/frem_float4.spv32 b/test_conformance/spirv_new/spirv_bin/frem_float4.spv32
new file mode 100644
index 0000000..e2f6610
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_float4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/frem_float4.spv64 b/test_conformance/spirv_new/spirv_bin/frem_float4.spv64
new file mode 100644
index 0000000..4ae62aa
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_float4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/frem_half.spv32 b/test_conformance/spirv_new/spirv_bin/frem_half.spv32
new file mode 100644
index 0000000..58da29f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_half.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/frem_half.spv64 b/test_conformance/spirv_new/spirv_bin/frem_half.spv64
new file mode 100644
index 0000000..3632752
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/frem_half.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fsub_double.spv32 b/test_conformance/spirv_new/spirv_bin/fsub_double.spv32
new file mode 100644
index 0000000..8643553
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_double.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fsub_double.spv64 b/test_conformance/spirv_new/spirv_bin/fsub_double.spv64
new file mode 100644
index 0000000..63bef82
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_double.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fsub_double2.spv32 b/test_conformance/spirv_new/spirv_bin/fsub_double2.spv32
new file mode 100644
index 0000000..11dbd58
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_double2.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fsub_double2.spv64 b/test_conformance/spirv_new/spirv_bin/fsub_double2.spv64
new file mode 100644
index 0000000..22c89d8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_double2.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fsub_float.spv32 b/test_conformance/spirv_new/spirv_bin/fsub_float.spv32
new file mode 100644
index 0000000..aadc29d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_float.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fsub_float.spv64 b/test_conformance/spirv_new/spirv_bin/fsub_float.spv64
new file mode 100644
index 0000000..e8bbec1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_float.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fsub_float4.spv32 b/test_conformance/spirv_new/spirv_bin/fsub_float4.spv32
new file mode 100644
index 0000000..cb97b5c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_float4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fsub_float4.spv64 b/test_conformance/spirv_new/spirv_bin/fsub_float4.spv64
new file mode 100644
index 0000000..4b2b8d2
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_float4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fsub_half.spv32 b/test_conformance/spirv_new/spirv_bin/fsub_half.spv32
new file mode 100644
index 0000000..4f7e4e9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_half.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/fsub_half.spv64 b/test_conformance/spirv_new/spirv_bin/fsub_half.spv64
new file mode 100644
index 0000000..f5ccc29
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/fsub_half.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/label_simple.spv32 b/test_conformance/spirv_new/spirv_bin/label_simple.spv32
new file mode 100644
index 0000000..338a821
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/label_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/label_simple.spv64 b/test_conformance/spirv_new/spirv_bin/label_simple.spv64
new file mode 100644
index 0000000..66b4bfc
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/label_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv32 b/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv32
new file mode 100644
index 0000000..75a8ef6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv64 b/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv64
new file mode 100644
index 0000000..34c873d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/lifetime_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/linkage_export.spv32 b/test_conformance/spirv_new/spirv_bin/linkage_export.spv32
new file mode 100644
index 0000000..280021b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/linkage_export.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/linkage_export.spv64 b/test_conformance/spirv_new/spirv_bin/linkage_export.spv64
new file mode 100644
index 0000000..fefdc26
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/linkage_export.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/linkage_import.spv32 b/test_conformance/spirv_new/spirv_bin/linkage_import.spv32
new file mode 100644
index 0000000..e8ae0f3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/linkage_import.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/linkage_import.spv64 b/test_conformance/spirv_new/spirv_bin/linkage_import.spv64
new file mode 100644
index 0000000..9aa4876
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/linkage_import.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv32
new file mode 100644
index 0000000..96aa742
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv64
new file mode 100644
index 0000000..0b4ebd3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_dont_unroll.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv32
new file mode 100644
index 0000000..f1c00ac
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv64
new file mode 100644
index 0000000..7490dd6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_none.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv32
new file mode 100644
index 0000000..8571f8e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv64
new file mode 100644
index 0000000..21a6a99
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_conditional_unroll.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv32
new file mode 100644
index 0000000..fbc355f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv64
new file mode 100644
index 0000000..c345403
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_dont_unroll.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv32
new file mode 100644
index 0000000..a00b862
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv64
new file mode 100644
index 0000000..3a4787f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_none.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv32 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv32
new file mode 100644
index 0000000..305a0c7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv64 b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv64
new file mode 100644
index 0000000..af93d9b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/loop_merge_branch_unroll.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_const.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_const.spv32
new file mode 100644
index 0000000..c371d5d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_const.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_const.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_const.spv64
new file mode 100644
index 0000000..07df8ab
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_const.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_inline.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_inline.spv32
new file mode 100644
index 0000000..3def2ee
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_inline.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_inline.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_inline.spv64
new file mode 100644
index 0000000..872e4ef
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_inline.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv32
new file mode 100644
index 0000000..3ac4c24
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv64
new file mode 100644
index 0000000..c9fbf13
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_noinline.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_none.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_none.spv32
new file mode 100644
index 0000000..027522f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_none.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_none.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_none.spv64
new file mode 100644
index 0000000..5411a36
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_none.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_pure.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_pure.spv32
new file mode 100644
index 0000000..64a261e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_pure.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_pure.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_pure.spv64
new file mode 100644
index 0000000..d03a1b7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_pure.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv32 b/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv32
new file mode 100644
index 0000000..acdec9d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv64 b/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv64
new file mode 100644
index 0000000..34e495b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_function_pure_ptr.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_double.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_double.spv32
new file mode 100644
index 0000000..005d38c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_double.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_double.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_double.spv64
new file mode 100644
index 0000000..71e7241
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_double.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_float.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_float.spv32
new file mode 100644
index 0000000..7a89c8d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_float.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_float.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_float.spv64
new file mode 100644
index 0000000..c76a39e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_float.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv32
new file mode 100644
index 0000000..f1a88fa
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv64
new file mode 100644
index 0000000..3d306ea
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_float4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_int.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_int.spv32
new file mode 100644
index 0000000..7fcd069
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_int.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_int.spv64
new file mode 100644
index 0000000..7ad8a10
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv32
new file mode 100644
index 0000000..4841414
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv64
new file mode 100644
index 0000000..1fc4696
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_int4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_long.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_long.spv32
new file mode 100644
index 0000000..b4f7fe6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_long.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_long.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_long.spv64
new file mode 100644
index 0000000..c0db13e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_long.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_short.spv32 b/test_conformance/spirv_new/spirv_bin/op_neg_short.spv32
new file mode 100644
index 0000000..ddc874b
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_short.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_neg_short.spv64 b/test_conformance/spirv_new/spirv_bin/op_neg_short.spv64
new file mode 100644
index 0000000..3e29164
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_neg_short.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_not_int.spv32 b/test_conformance/spirv_new/spirv_bin/op_not_int.spv32
new file mode 100644
index 0000000..2e6f997
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_int.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_not_int.spv64 b/test_conformance/spirv_new/spirv_bin/op_not_int.spv64
new file mode 100644
index 0000000..3c6f6d8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_int.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_not_int4.spv32 b/test_conformance/spirv_new/spirv_bin/op_not_int4.spv32
new file mode 100644
index 0000000..bbc1b56
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_int4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_not_int4.spv64 b/test_conformance/spirv_new/spirv_bin/op_not_int4.spv64
new file mode 100644
index 0000000..a40c025
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_int4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_not_long.spv32 b/test_conformance/spirv_new/spirv_bin/op_not_long.spv32
new file mode 100644
index 0000000..b00d205
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_long.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_not_long.spv64 b/test_conformance/spirv_new/spirv_bin/op_not_long.spv64
new file mode 100644
index 0000000..13719ed
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_long.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_not_short.spv32 b/test_conformance/spirv_new/spirv_bin/op_not_short.spv32
new file mode 100644
index 0000000..fa1a88d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_short.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/op_not_short.spv64 b/test_conformance/spirv_new/spirv_bin/op_not_short.spv64
new file mode 100644
index 0000000..648f92f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/op_not_short.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/opaque.spv32 b/test_conformance/spirv_new/spirv_bin/opaque.spv32
new file mode 100644
index 0000000..3400d98
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/opaque.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/opaque.spv64 b/test_conformance/spirv_new/spirv_bin/opaque.spv64
new file mode 100644
index 0000000..8383e93
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/opaque.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/phi_2.spv32 b/test_conformance/spirv_new/spirv_bin/phi_2.spv32
new file mode 100644
index 0000000..22ae786
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_2.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/phi_2.spv64 b/test_conformance/spirv_new/spirv_bin/phi_2.spv64
new file mode 100644
index 0000000..4ef16fb
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_2.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/phi_3.spv32 b/test_conformance/spirv_new/spirv_bin/phi_3.spv32
new file mode 100644
index 0000000..de55f63
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_3.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/phi_3.spv64 b/test_conformance/spirv_new/spirv_bin/phi_3.spv64
new file mode 100644
index 0000000..383b364
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_3.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/phi_4.spv32 b/test_conformance/spirv_new/spirv_bin/phi_4.spv32
new file mode 100644
index 0000000..45e74e0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_4.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/phi_4.spv64 b/test_conformance/spirv_new/spirv_bin/phi_4.spv64
new file mode 100644
index 0000000..f5fc763
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/phi_4.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv32 b/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv32
new file mode 100644
index 0000000..e454917
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv64 b/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv64
new file mode 100644
index 0000000..913b327
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_dont_flatten.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv32 b/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv32
new file mode 100644
index 0000000..5f42dcd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv64 b/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv64
new file mode 100644
index 0000000..f266d21
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_flatten.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_if_none.spv32 b/test_conformance/spirv_new/spirv_bin/select_if_none.spv32
new file mode 100644
index 0000000..be9ffb6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_none.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_if_none.spv64 b/test_conformance/spirv_new/spirv_bin/select_if_none.spv64
new file mode 100644
index 0000000..3719f76
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_if_none.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv32 b/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv32
new file mode 100644
index 0000000..5caee39
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv64 b/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv64
new file mode 100644
index 0000000..937cd4d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_dont_flatten.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv32 b/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv32
new file mode 100644
index 0000000..c8ed6df
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv64 b/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv64
new file mode 100644
index 0000000..579681d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_flatten.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_none.spv32 b/test_conformance/spirv_new/spirv_bin/select_switch_none.spv32
new file mode 100644
index 0000000..e92f494
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_none.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/select_switch_none.spv64 b/test_conformance/spirv_new/spirv_bin/select_switch_none.spv64
new file mode 100644
index 0000000..385281a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/select_switch_none.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv32
new file mode 100644
index 0000000..aaa7d8d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv64
new file mode 100644
index 0000000..73318ea
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_char_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv32
new file mode 100644
index 0000000..f527471
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv64
new file mode 100644
index 0000000..8260c99
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_double_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv32
new file mode 100644
index 0000000..1b56ccd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv64
new file mode 100644
index 0000000..5b3c33d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_false_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv32
new file mode 100644
index 0000000..fe1365a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv64
new file mode 100644
index 0000000..d3a78f1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_float_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv32
new file mode 100644
index 0000000..947fdc9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv64
new file mode 100644
index 0000000..ef6473f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_half_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv32
new file mode 100644
index 0000000..73ae0c2
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv64
new file mode 100644
index 0000000..38e6b23
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int3_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv32
new file mode 100644
index 0000000..a4fac3e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv64
new file mode 100644
index 0000000..92c785a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int4_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv32
new file mode 100644
index 0000000..244aa7e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv64
new file mode 100644
index 0000000..ee152ea
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_int_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv32
new file mode 100644
index 0000000..6c304c6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv64
new file mode 100644
index 0000000..fe0bae6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_long_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv32
new file mode 100644
index 0000000..c55c7ed
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv64
new file mode 100644
index 0000000..9aa240c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_short_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv32
new file mode 100644
index 0000000..97a0252
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv64
new file mode 100644
index 0000000..092cee0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_int_char_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv32
new file mode 100644
index 0000000..912e9e8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv64
new file mode 100644
index 0000000..a1ef3fc
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_int_float_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv32
new file mode 100644
index 0000000..8d8fe39
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv64
new file mode 100644
index 0000000..8030440
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_struct_struct_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv32
new file mode 100644
index 0000000..08b9033
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv64
new file mode 100644
index 0000000..41c2bef
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_true_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv32
new file mode 100644
index 0000000..e3b3074
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv64
new file mode 100644
index 0000000..929d352
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_uchar_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv32
new file mode 100644
index 0000000..0073745
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv64
new file mode 100644
index 0000000..6e58cef
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_uint_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv32
new file mode 100644
index 0000000..4a51c3c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv64
new file mode 100644
index 0000000..4651d73
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_ulong_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv32 b/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv32
new file mode 100644
index 0000000..6030982
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv64 b/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv64
new file mode 100644
index 0000000..c348a95
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/undef_ushort_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv32 b/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv32
new file mode 100644
index 0000000..0d4f238
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv64 b/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv64
new file mode 100644
index 0000000..70596a0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/unreachable_simple.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv32 b/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv32
new file mode 100644
index 0000000..6acf7ee
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv64 b/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv64
new file mode 100644
index 0000000..65dde38
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_char16_extract.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv32 b/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv32
new file mode 100644
index 0000000..074e2e5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv64 b/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv64
new file mode 100644
index 0000000..3e12740
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_char16_insert.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv32 b/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv32
new file mode 100644
index 0000000..19eb940
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv64 b/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv64
new file mode 100644
index 0000000..f7cf8a7
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_double2_extract.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv32 b/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv32
new file mode 100644
index 0000000..02601e5
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv64 b/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv64
new file mode 100644
index 0000000..1dd7e63
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_double2_insert.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv32 b/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv32
new file mode 100644
index 0000000..a1c4888
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv64 b/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv64
new file mode 100644
index 0000000..16d0194
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_float4_extract.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv32 b/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv32
new file mode 100644
index 0000000..f6bba1e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv64 b/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv64
new file mode 100644
index 0000000..0d84bd6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_float4_insert.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv32 b/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv32
new file mode 100644
index 0000000..50d5dd4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv64 b/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv64
new file mode 100644
index 0000000..5b6b0c1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_int4_extract.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv32 b/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv32
new file mode 100644
index 0000000..006ec9a
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv64 b/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv64
new file mode 100644
index 0000000..47996ed
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_int4_insert.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv32 b/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv32
new file mode 100644
index 0000000..229e3b6
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv64 b/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv64
new file mode 100644
index 0000000..34d2517
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_long2_extract.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv32 b/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv32
new file mode 100644
index 0000000..e62cef9
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv64 b/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv64
new file mode 100644
index 0000000..2eb8833
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_long2_insert.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv32 b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv32
new file mode 100644
index 0000000..8e672dd
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv64 b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv64
new file mode 100644
index 0000000..21e4dc4
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_double.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv32 b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv32
new file mode 100644
index 0000000..2c4d9c3
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv32
Binary files differ
diff --git a/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv64 b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv64
new file mode 100644
index 0000000..94dff07
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_bin/vector_times_scalar_float.spv64
Binary files differ
diff --git a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
index 9e1789c..84f8ed1 100644
--- a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
+++ b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
@@ -129,9 +129,13 @@

         // Run the cl kernel for reference results

         clProgramWrapper prog;

-        clKernelWrapper kernel;

-        err = create_single_kernel_helper(context, &prog, &kernel, 1,

-                                          &kernelBuf, "fmath_cl");

+        err = create_single_kernel_helper_create_program(context, &prog, 1, &kernelBuf, NULL);

+        SPIRV_CHECK_ERROR(err, "Failed to create cl program");


+        err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL);

+        SPIRV_CHECK_ERROR(err, "Failed to build program");


+        clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err);

         SPIRV_CHECK_ERROR(err, "Failed to create cl kernel");


         clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);

diff --git a/test_conformance/spirv_new/test_decorate.cpp b/test_conformance/spirv_new/test_decorate.cpp
index ccd7431..766a6b6 100644
--- a/test_conformance/spirv_new/test_decorate.cpp
+++ b/test_conformance/spirv_new/test_decorate.cpp
@@ -310,6 +310,174 @@
 TEST_SATURATED_CONVERSION(double, ulong, uint)
+int test_image_decorate(cl_device_id deviceID,
+                        cl_context context,
+                        cl_command_queue queue,
+                        const char *name)
+    const int width = 4096;
+    const int height = 4096;
+    std::vector<cl_uint4> src(width * height);
+    RandomSeed seed(gRandomSeed);
+    for (auto &val : src) {
+        val = genrand<cl_uint4>(seed);
+    }
+    cl_image_format imageFormat;
+    imageFormat.image_channel_data_type = CL_UNSIGNED_INT32;
+    imageFormat.image_channel_order = CL_RGBA;
+    cl_image_desc desc;
+    desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+    desc.image_width = width;
+    desc.image_height = height;
+    desc.image_depth = 0;
+    desc.image_array_size = 0;
+    desc.image_row_pitch = 0; // Automatically calculate the pitch
+    desc.image_slice_pitch = 0; // Not needed for 2D
+    desc.num_mip_levels = 0;
+    desc.num_samples = 0;
+    desc.mem_object = NULL;
+    cl_int err = CL_SUCCESS;
+    clMemWrapper srcImage = clCreateImage(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                                          &imageFormat, &desc,
+                                          &src[0], &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create image object");
+    size_t bytes = src.size() * sizeof(cl_uint4);
+    clMemWrapper dstBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create image object");
+    clProgramWrapper prog;
+    err = get_program_with_il(prog, deviceID, context, name);
+    SPIRV_CHECK_ERROR(err, "Failed to build program");
+    clKernelWrapper kernel = clCreateKernel(prog, name, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create spv kernel");
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dstBuffer);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 2 of the kernel");
+    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &srcImage);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 1 of the kernel");
+    size_t global[] = {width, height};
+    err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, NULL, 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to enqueue kernel");
+    std::vector<cl_uint4> dst(src.size());
+    err = clEnqueueReadBuffer(queue, dstBuffer, CL_TRUE, 0, bytes, &dst[0], 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to copy data back to host");
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            int srcIdx = j * width + i;
+            int dstIdx = i * height + j;
+            if (dst[dstIdx] != src[srcIdx]) {
+                log_error("Values do not match at location (%d, %d) of src\n", i, j);
+            }
+        }
+    }
+    return 0;
+#define TEST_SPIRV_IMAGE_DECORATE(type)                         \
+    TEST_SPIRV_FUNC(decorate_##type)                            \
+    {                                                           \
+        PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID);                \
+        return test_image_decorate(deviceID, context, queue,    \
+                                   "decorate_" #type);          \
+    }                                                           \
+    const char *name = "decorate_nonreadable";
+    const int width = 4096;
+    const int height = 4096;
+    cl_int err = CL_SUCCESS;
+    std::vector<cl_uint4> src(width * height);
+    RandomSeed seed(gRandomSeed);
+    for (auto &val : src) {
+        val = genrand<cl_uint4>(seed);
+    }
+    size_t bytes = src.size() * sizeof(cl_uint4);
+    clMemWrapper srcBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create image object");
+    err = clEnqueueWriteBuffer(queue, srcBuffer, CL_TRUE, 0, bytes, &src[0], 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to copy data back to host");
+    cl_image_format imageFormat;
+    imageFormat.image_channel_data_type = CL_UNSIGNED_INT32;
+    imageFormat.image_channel_order = CL_RGBA;
+    cl_image_desc desc;
+    desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+    desc.image_width = height;
+    desc.image_height = width;
+    desc.image_depth = 0;
+    desc.image_array_size = 0;
+    desc.image_row_pitch = 0; // Automatically calculate the pitch
+    desc.image_slice_pitch = 0; // Not needed for 2D
+    desc.num_mip_levels = 0;
+    desc.num_samples = 0;
+    desc.mem_object = NULL;
+    clMemWrapper dstImage = clCreateImage(context, CL_MEM_WRITE_ONLY,
+                                          &imageFormat, &desc,
+                                          NULL, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create image object");
+    clProgramWrapper prog;
+    err = get_program_with_il(prog, deviceID, context, name);
+    SPIRV_CHECK_ERROR(err, "Failed to build program");
+    clKernelWrapper kernel = clCreateKernel(prog, name, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create spv kernel");
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dstImage);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 2 of the kernel");
+    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &srcBuffer);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 1 of the kernel");
+    size_t global[] = {width, height};
+    err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, NULL, 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to enqueue kernel");
+    std::vector<cl_uint4> dst(src.size());
+    size_t origin[] = {0, 0, 0};
+    size_t region[] = {height, width, 1};
+    err = clEnqueueReadImage(queue, dstImage, CL_TRUE, origin, region, 0, 0, &dst[0], 0, NULL, NULL);
+    for (int j = 0; j < height; j++) {
+        for (int i = 0; i < width; i++) {
+            int srcIdx = j * width + i;
+            int dstIdx = i * height + j;
+            if (dst[dstIdx] != src[srcIdx]) {
+                log_error("Values do not match at location (%d, %d) of src\n", i, j);
+            }
+        }
+    }
+    return 0;
 template<typename Ti, typename To>
 int test_fp_rounding(cl_device_id deviceID,
                      cl_context context,
diff --git a/test_conformance/spirv_new/test_get_program_il.cpp b/test_conformance/spirv_new/test_get_program_il.cpp
deleted file mode 100644
index cf349d1..0000000
--- a/test_conformance/spirv_new/test_get_program_il.cpp
+++ /dev/null
@@ -1,105 +0,0 @@
-Copyright (c) 2020 The Khronos Group Inc. All Rights Reserved.
-This code is protected by copyright laws and contains material proprietary to
-the Khronos Group, Inc. This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not
-be disclosed in whole or in part to third parties, and may not be reproduced,
-republished, distributed, transmitted, displayed, broadcast or otherwise
-exploited in any manner without the express prior written permission of Khronos
-Group. The receipt or possession of this code does not convey any rights to
-reproduce, disclose, or distribute its contents, or to manufacture, use, or sell
-anything that it may describe, in whole or in part other than under the terms of
-the Khronos Adopters Agreement or Khronos Conformance Test Source License
-Agreement as executed between Khronos and the recipient.
-#include "testBase.h"
-const char *sample_kernel_code_single_line[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "\n"
-    "}\n"
-    clProgramWrapper source_program;
-    size_t il_size = -1;
-    int error;
-    /* If a program has been created with clCreateProgramWithIL, CL_PROGRAM_IL
-     * should return the program IL it was created with and it's size */
-    if (gCoreILProgram || is_extension_available(deviceID, "cl_khr_il_program"))
-    {
-        clProgramWrapper il_program;
-        std::string spvStr = "op_function_none";
-        const char *spvName = spvStr.c_str();
-        std::vector<unsigned char> spirv_binary = readSPIRV(spvName);
-        int file_bytes = spirv_binary.size();
-        if (file_bytes == 0)
-        {
-            test_fail("ERROR: SPIRV file %s not found!\n", spvName);
-        }
-        /* Create program with IL */
-        unsigned char *spirv_buffer = &spirv_binary[0];
-        error = get_program_with_il(il_program, deviceID, context, spvName);
-        SPIRV_CHECK_ERROR(error, "Unable to create program with IL.");
-        if (il_program == NULL)
-        {
-            test_fail("ERROR: Unable to create test program!\n");
-        }
-        /* Check program IL is the same as the source IL */
-        unsigned char *buffer = new unsigned char[file_bytes];
-        error = clGetProgramInfo(il_program, CL_PROGRAM_IL, file_bytes, buffer,
-                                 &il_size);
-        SPIRV_CHECK_ERROR(error, "Unable to get program info.");
-        if (il_size != file_bytes)
-        {
-            test_fail("ERROR: Returned IL size is not the same as source IL "
-                      "size (%lu "
-                      "!= %lu)!\n",
-                      il_size, file_bytes);
-        }
-        if (memcmp(buffer, spirv_buffer, file_bytes) != 0)
-        {
-            test_fail("ERROR: Returned IL is not the same as source IL!\n");
-        }
-        delete[] buffer;
-    }
-    /* CL_PROGRAM_IL shouldn't return IL value unless program is created with
-     * clCreateProgramWithIL */
-    error = create_single_kernel_helper_create_program(
-        context, &source_program, 1, sample_kernel_code_single_line);
-    if (source_program == NULL)
-    {
-        test_fail("ERROR: Unable to create test program!\n");
-    }
-    if (gCompilationMode != kSpir_v)
-    {
-        error =
-            clGetProgramInfo(source_program, CL_PROGRAM_IL, 0, NULL, &il_size);
-        SPIRV_CHECK_ERROR(error, "Unable to get program il length");
-        if (il_size != 0)
-        {
-            test_fail(
-                "ERROR: Returned length of non-IL program IL is non-zero!\n");
-        }
-    }
-    return 0;
\ No newline at end of file
diff --git a/test_conformance/spirv_new/test_linkage.cpp b/test_conformance/spirv_new/test_linkage.cpp
index cf518c3..93972c9 100644
--- a/test_conformance/spirv_new/test_linkage.cpp
+++ b/test_conformance/spirv_new/test_linkage.cpp
@@ -33,43 +33,17 @@
     unsigned char *buffer = &buffer_vec[0];
-    if (gCoreILProgram)
-    {
-        prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
-            err, "Failed to create program with clCreateProgramWithIL");
-    }
-    else
-    {
-        cl_platform_id platform;
-        err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
-                              sizeof(cl_platform_id), &platform, NULL);
-        SPIRV_CHECK_ERROR(err,
-                          "Failed to get platform info with clGetDeviceInfo");
-        clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
-        clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                platform, "clCreateProgramWithILKHR");
-        if (clCreateProgramWithILKHR == NULL)
-        {
-            log_error(
-                "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
-            return -1;
-        }
-        prog = clCreateProgramWithILKHR(context, buffer, file_bytes, &err);
-            err, "Failed to create program with clCreateProgramWithILKHR");
-    }
+    prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create program with clCreateProgramWithIL");
     err = clCompileProgram(prog, 1, &deviceID,
                            NULL, // options
-                           0, // num headers
+                           0,    // num headers
                            NULL, // input headers
                            NULL, // header include names
                            NULL, // callback
-                           NULL // User data
-    );
+                           NULL  // User data
+        );
     SPIRV_CHECK_ERROR(err, "Failed to compile spv program");
     return 0;
diff --git a/test_conformance/spirv_new/test_op_fmath.cpp b/test_conformance/spirv_new/test_op_fmath.cpp
index bec0667..7250eb1 100644
--- a/test_conformance/spirv_new/test_op_fmath.cpp
+++ b/test_conformance/spirv_new/test_op_fmath.cpp
@@ -89,9 +89,13 @@
         // Run the cl kernel for reference results
         clProgramWrapper prog;
-        clKernelWrapper kernel;
-        err = create_single_kernel_helper(context, &prog, &kernel, 1,
-                                          &kernelBuf, "fmath_cl");
+        err = create_single_kernel_helper_create_program(context, &prog, 1, &kernelBuf, NULL);
+        SPIRV_CHECK_ERROR(err, "Failed to create cl program");
+        err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL);
+        SPIRV_CHECK_ERROR(err, "Failed to build program");
+        clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err);
         SPIRV_CHECK_ERROR(err, "Failed to create cl kernel");
         clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
diff --git a/test_conformance/spirv_new/test_op_opaque.cpp b/test_conformance/spirv_new/test_op_opaque.cpp
index 067d9e4..930e26d 100644
--- a/test_conformance/spirv_new/test_op_opaque.cpp
+++ b/test_conformance/spirv_new/test_op_opaque.cpp
@@ -28,45 +28,17 @@
     unsigned char *buffer = &buffer_vec[0];
-    clProgramWrapper prog;
-    if (gCoreILProgram)
-    {
-        prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
-            err, "Failed to create program with clCreateProgramWithIL");
-    }
-    else
-    {
-        cl_platform_id platform;
-        err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
-                              sizeof(cl_platform_id), &platform, NULL);
-        SPIRV_CHECK_ERROR(err,
-                          "Failed to get platform info with clGetDeviceInfo");
-        clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
-        clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
-            clGetExtensionFunctionAddressForPlatform(
-                platform, "clCreateProgramWithILKHR");
-        if (clCreateProgramWithILKHR == NULL)
-        {
-            log_error(
-                "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
-            return -1;
-        }
-        prog = clCreateProgramWithILKHR(context, buffer, file_bytes, &err);
-            err, "Failed to create program with clCreateProgramWithILKHR");
-    }
+    clProgramWrapper prog = clCreateProgramWithIL(context, buffer, file_bytes, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create program with clCreateProgramWithIL");
     err = clCompileProgram(prog, 1, &deviceID,
                            NULL, // options
-                           0, // num headers
+                           0,    // num headers
                            NULL, // input headers
                            NULL, // header include names
                            NULL, // callback
-                           NULL // User data
-    );
+                           NULL  // User data
+        );
     SPIRV_CHECK_ERROR(err, "Failed to compile spv program");
     return 0;
diff --git a/test_conformance/spirv_new/test_op_spec_constant.cpp b/test_conformance/spirv_new/test_op_spec_constant.cpp
deleted file mode 100644
index a280a4f..0000000
--- a/test_conformance/spirv_new/test_op_spec_constant.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-Copyright (c) 2020 The Khronos Group Inc. All Rights Reserved.
-This code is protected by copyright laws and contains material proprietary to
-the Khronos Group, Inc. This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not
-be disclosed in whole or in part to third parties, and may not be reproduced,
-republished, distributed, transmitted, displayed, broadcast or otherwise
-exploited in any manner without the express prior written permission of Khronos
-Group. The receipt or possession of this code does not convey any rights to
-reproduce, disclose, or distribute its contents, or to manufacture, use, or sell
-anything that it may describe, in whole or in part other than under the terms of
-the Khronos Adopters Agreement or Khronos Conformance Test Source License
-Agreement as executed between Khronos and the recipient.
-#include "testBase.h"
-#include "types.hpp"
-template <typename T>
-int run_case(cl_device_id deviceID, cl_context context, cl_command_queue queue,
-             const char *name, T init_buffer, T spec_constant_value,
-             T final_value, bool use_spec_constant)
-    clProgramWrapper prog;
-    cl_int err = CL_SUCCESS;
-    if (use_spec_constant)
-    {
-        spec_const new_spec_const =
-            spec_const(101, sizeof(T), &spec_constant_value);
-        err =
-            get_program_with_il(prog, deviceID, context, name, new_spec_const);
-    }
-    else
-    {
-        err = get_program_with_il(prog, deviceID, context, name);
-    }
-    SPIRV_CHECK_ERROR(err, "Failed to build program");
-    clKernelWrapper kernel = clCreateKernel(prog, "spec_const_kernel", &err);
-    SPIRV_CHECK_ERROR(err, "Failed to create kernel");
-    size_t bytes = sizeof(T);
-    clMemWrapper output_buffer =
-        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, bytes,
-                       &init_buffer, &err);
-    SPIRV_CHECK_ERROR(err, "Failed to create output_buffer");
-    err = clSetKernelArg(kernel, 0, sizeof(clMemWrapper), &output_buffer);
-    SPIRV_CHECK_ERROR(err, "Failed to set kernel argument output_buffer");
-    size_t work_size = 1;
-    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &work_size, NULL, 0,
-                                 NULL, NULL);
-    SPIRV_CHECK_ERROR(err, "Failed to enqueue kernel");
-    T device_results = 0;
-    err = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, bytes,
-                              &device_results, 0, NULL, NULL);
-    SPIRV_CHECK_ERROR(err, "Failed to copy from output_buffer");
-    T reference = 0;
-    use_spec_constant ? reference = final_value : reference = init_buffer;
-    if (device_results != reference)
-    {
-        log_error("Values do not match. Expected %d obtained %d\n", reference,
-                  device_results);
-        err = -1;
-    }
-    return err;
-template <typename T>
-int test_spec_constant(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, const char *name, T init_buffer,
-                       T spec_constant_value, T final_value)
-    if (std::string(name).find("double") != std::string::npos)
-    {
-        if (!is_extension_available(deviceID, "cl_khr_fp64"))
-        {
-            log_info("Extension cl_khr_fp64 not supported; skipping double "
-                     "tests.\n");
-            return TEST_SKIPPED_ITSELF;
-        }
-    }
-    if (std::string(name).find("half") != std::string::npos)
-    {
-        if (!is_extension_available(deviceID, "cl_khr_fp16"))
-        {
-            log_info("Extension cl_khr_fp16 not supported; skipping half "
-                     "tests.\n");
-            return TEST_SKIPPED_ITSELF;
-        }
-    }
-    cl_int err = CL_SUCCESS;
-    err = run_case<T>(deviceID, context, queue, name, init_buffer,
-                      spec_constant_value, final_value, false);
-    err |= run_case<T>(deviceID, context, queue, name, init_buffer,
-                       spec_constant_value, final_value, true);
-    if (err == CL_SUCCESS)
-    {
-        return TEST_PASS;
-    }
-    else
-    {
-        return TEST_FAIL;
-    }
-#define TEST_SPEC_CONSTANT(NAME, type, init_buffer, spec_constant_value)       \
-    TEST_SPIRV_FUNC_VERSION(op_spec_constant_##NAME##_simple, Version(2, 2))   \
-    {                                                                          \
-        type init_value = init_buffer;                                         \
-        type final_value = init_value + spec_constant_value;                   \
-        return test_spec_constant(                                             \
-            deviceID, context, queue, "op_spec_constant_" #NAME "_simple",     \
-            init_value, (type)spec_constant_value, final_value);               \
-    }
-// type name, type, value init, spec constant value
-TEST_SPEC_CONSTANT(uint, cl_uint, 25, 43)
-TEST_SPEC_CONSTANT(uchar, cl_uchar, 19, 4)
-TEST_SPEC_CONSTANT(ushort, cl_ushort, 6000, 3000)
-TEST_SPEC_CONSTANT(ulong, cl_ulong, 9223372036854775000UL, 200)
-TEST_SPEC_CONSTANT(float, cl_float, 1.5, -3.7)
-TEST_SPEC_CONSTANT(half, cl_half, 1, 2)
-TEST_SPEC_CONSTANT(double, cl_double, 14534.53453, 1.53453)
-// Boolean tests
-// documenation: 'If a specialization constant is a boolean
-// constant, spec_value should be a pointer to a cl_uchar value'
-TEST_SPIRV_FUNC_VERSION(op_spec_constant_true_simple, Version(2, 2))
-    // 1-st ndrange init_value is expected value (no change)
-    // 2-nd ndrange sets spec const to 'false' so value = value + 1
-    cl_uchar value = (cl_uchar)7;
-    cl_uchar init_value = value;
-    cl_uchar final_value = value + 1;
-    return test_spec_constant<cl_uchar>(deviceID, context, queue,
-                                        "op_spec_constant_true_simple",
-                                        init_value, 0, final_value);
-TEST_SPIRV_FUNC_VERSION(op_spec_constant_false_simple, Version(2, 2))
-    // 1-st ndrange init_value is expected value (no change)
-    // 2-nd ndrange sets spec const to 'true' so value = value + 1
-    cl_uchar value = (cl_uchar)7;
-    cl_uchar init_value = value;
-    cl_uchar final_value = value + 1;
-    return test_spec_constant<cl_uchar>(deviceID, context, queue,
-                                        "op_spec_constant_false_simple",
-                                        init_value, 1, final_value);
diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
index 0a604bc..99d71f7 100644
--- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
+++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
@@ -82,11 +82,15 @@
         // Run the cl kernel for reference results
         clProgramWrapper prog;
-        clKernelWrapper kernel;
-        err = create_single_kernel_helper(context, &prog, &kernel, 1,
-                                          &kernelBuf, "vector_times_scalar");
+        err = create_single_kernel_helper_create_program(context, &prog, 1, &kernelBuf, NULL);
         SPIRV_CHECK_ERROR(err, "Failed to create cl program");
+        err = clBuildProgram(prog, 1, &deviceID, NULL, NULL, NULL);
+        SPIRV_CHECK_ERROR(err, "Failed to build program");
+        clKernelWrapper kernel = clCreateKernel(prog, "vector_times_scalar", &err);
+        SPIRV_CHECK_ERROR(err, "Failed to create cl kernel");
         clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, res_bytes, NULL, &err);
         SPIRV_CHECK_ERROR(err, "Failed to create ref buffer");
diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt
index d48af9c..eb6a607 100644
--- a/test_conformance/subgroups/CMakeLists.txt
+++ b/test_conformance/subgroups/CMakeLists.txt
@@ -5,16 +5,8 @@
-    test_subgroup.cpp
+    test_workgroup.cpp
-    test_subgroup_extended_types.cpp
-    subgroup_common_kernels.cpp
-    test_subgroup_non_uniform_vote.cpp
-    test_subgroup_non_uniform_arithmetic.cpp
-    test_subgroup_ballot.cpp
-    test_subgroup_clustered_reduce.cpp
-    test_subgroup_shuffle.cpp
-    test_subgroup_shuffle_relative.cpp
diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp
index 44416dd..f9a9a9d 100644
--- a/test_conformance/subgroups/main.cpp
+++ b/test_conformance/subgroups/main.cpp
@@ -27,19 +27,12 @@
     ADD_TEST_VERSION(sub_group_info_core, Version(2, 1)),
     ADD_TEST_VERSION(work_item_functions_ext, Version(2, 0)),
     ADD_TEST_VERSION(work_item_functions_core, Version(2, 1)),
-    ADD_TEST_VERSION(subgroup_functions_ext, Version(2, 0)),
-    ADD_TEST_VERSION(subgroup_functions_core, Version(2, 1)),
+    ADD_TEST_VERSION(work_group_functions_ext, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_functions_core, Version(2, 1)),
     ADD_TEST_VERSION(barrier_functions_ext, Version(2, 0)),
     ADD_TEST_VERSION(barrier_functions_core, Version(2, 1)),
     ADD_TEST_VERSION(ifp_ext, Version(2, 0)),
-    ADD_TEST_VERSION(ifp_core, Version(2, 1)),
-    ADD_TEST(subgroup_functions_extended_types),
-    ADD_TEST(subgroup_functions_non_uniform_vote),
-    ADD_TEST(subgroup_functions_non_uniform_arithmetic),
-    ADD_TEST(subgroup_functions_ballot),
-    ADD_TEST(subgroup_functions_clustered_reduce),
-    ADD_TEST(subgroup_functions_shuffle),
-    ADD_TEST(subgroup_functions_shuffle_relative)
+    ADD_TEST_VERSION(ifp_core, Version(2, 1))
 const int test_num = ARRAY_SIZE(test_list);
diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h
index d09e824..3ebb13b 100644
--- a/test_conformance/subgroups/procs.h
+++ b/test_conformance/subgroups/procs.h
@@ -37,12 +37,14 @@
                                          cl_context context,
                                          cl_command_queue queue,
                                          int num_elements);
-extern int test_subgroup_functions_ext(cl_device_id device, cl_context context,
-                                       cl_command_queue queue,
-                                       int num_elements);
-extern int test_subgroup_functions_core(cl_device_id device, cl_context context,
-                                        cl_command_queue queue,
-                                        int num_elements);
+extern int test_work_group_functions_ext(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements);
+extern int test_work_group_functions_core(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
 extern int test_barrier_functions_ext(cl_device_id device, cl_context context,
                                       cl_command_queue queue, int num_elements);
 extern int test_barrier_functions_core(cl_device_id device, cl_context context,
@@ -54,31 +56,5 @@
                         cl_command_queue queue, int num_elements);
 extern int test_ifp_core(cl_device_id device, cl_context context,
                          cl_command_queue queue, int num_elements);
-extern int test_subgroup_functions_extended_types(cl_device_id device,
-                                                  cl_context context,
-                                                  cl_command_queue queue,
-                                                  int num_elements);
-extern int test_subgroup_functions_non_uniform_vote(cl_device_id device,
-                                                    cl_context context,
-                                                    cl_command_queue queue,
-                                                    int num_elements);
-extern int test_subgroup_functions_non_uniform_arithmetic(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int test_subgroup_functions_ballot(cl_device_id device,
-                                          cl_context context,
-                                          cl_command_queue queue,
-                                          int num_elements);
-extern int test_subgroup_functions_clustered_reduce(cl_device_id device,
-                                                    cl_context context,
-                                                    cl_command_queue queue,
-                                                    int num_elements);
-extern int test_subgroup_functions_shuffle(cl_device_id device,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements);
-extern int test_subgroup_functions_shuffle_relative(cl_device_id device,
-                                                    cl_context context,
-                                                    cl_command_queue queue,
-                                                    int num_elements);
 #endif /*_procs_h*/
diff --git a/test_conformance/subgroups/subgroup_common_kernels.cpp b/test_conformance/subgroups/subgroup_common_kernels.cpp
deleted file mode 100644
index f8b2445..0000000
--- a/test_conformance/subgroups/subgroup_common_kernels.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "subgroup_common_kernels.h"
-const char* bcast_source =
-    "__kernel void test_bcast(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint which_sub_group_local_id = xy[gid].z;\n"
-    "    out[gid] = sub_group_broadcast(x, which_sub_group_local_id);\n"
-    "}\n";
-const char* redadd_source = "__kernel void test_redadd(const __global Type "
-                            "*in, __global int4 *xy, __global Type *out)\n"
-                            "{\n"
-                            "    int gid = get_global_id(0);\n"
-                            "    XY(xy,gid);\n"
-                            "    out[gid] = sub_group_reduce_add(in[gid]);\n"
-                            "}\n";
-const char* redmax_source = "__kernel void test_redmax(const __global Type "
-                            "*in, __global int4 *xy, __global Type *out)\n"
-                            "{\n"
-                            "    int gid = get_global_id(0);\n"
-                            "    XY(xy,gid);\n"
-                            "    out[gid] = sub_group_reduce_max(in[gid]);\n"
-                            "}\n";
-const char* redmin_source = "__kernel void test_redmin(const __global Type "
-                            "*in, __global int4 *xy, __global Type *out)\n"
-                            "{\n"
-                            "    int gid = get_global_id(0);\n"
-                            "    XY(xy,gid);\n"
-                            "    out[gid] = sub_group_reduce_min(in[gid]);\n"
-                            "}\n";
-const char* scinadd_source =
-    "__kernel void test_scinadd(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_inclusive_add(in[gid]);\n"
-    "}\n";
-const char* scinmax_source =
-    "__kernel void test_scinmax(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_inclusive_max(in[gid]);\n"
-    "}\n";
-const char* scinmin_source =
-    "__kernel void test_scinmin(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_inclusive_min(in[gid]);\n"
-    "}\n";
-const char* scexadd_source =
-    "__kernel void test_scexadd(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_exclusive_add(in[gid]);\n"
-    "}\n";
-const char* scexmax_source =
-    "__kernel void test_scexmax(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_exclusive_max(in[gid]);\n"
-    "}\n";
-const char* scexmin_source =
-    "__kernel void test_scexmin(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
-    "}\n";
diff --git a/test_conformance/subgroups/subgroup_common_kernels.h b/test_conformance/subgroups/subgroup_common_kernels.h
deleted file mode 100644
index 8ae97d9..0000000
--- a/test_conformance/subgroups/subgroup_common_kernels.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "subhelpers.h"
-extern const char* bcast_source;
-extern const char* redadd_source;
-extern const char* redmax_source;
-extern const char* redmin_source;
-extern const char* scinadd_source;
-extern const char* scinmax_source;
-extern const char* scinmin_source;
-extern const char* scexadd_source;
-extern const char* scexmax_source;
-extern const char* scexmin_source;
diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
deleted file mode 100644
index b30c416..0000000
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ /dev/null
@@ -1,911 +0,0 @@
-// Copyright (c) 2020 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "typeWrappers.h"
-#include <bitset>
-#include "CL/cl_half.h"
-#include "subhelpers.h"
-#include <set>
-typedef std::bitset<128> bs128;
-static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
-                                  const std::string &mask_type,
-                                  cl_uint max_sub_group_size)
-    bs128 mask128;
-    cl_uint4 mask;
-    cl_uint pos = subgroup_local_id;
-    if (mask_type == "eq") mask128.set(pos);
-    if (mask_type == "le" || mask_type == "lt")
-    {
-        for (cl_uint i = 0; i <= pos; i++) mask128.set(i);
-        if (mask_type == "lt") mask128.reset(pos);
-    }
-    if (mask_type == "ge" || mask_type == "gt")
-    {
-        for (cl_uint i = pos; i < max_sub_group_size; i++) mask128.set(i);
-        if (mask_type == "gt") mask128.reset(pos);
-    }
-    // convert std::bitset<128> to uint4
-    auto const uint_mask = bs128{ static_cast<unsigned long>(-1) };
-    mask.s0 = (mask128 & uint_mask).to_ulong();
-    mask128 >>= 32;
-    mask.s1 = (mask128 & uint_mask).to_ulong();
-    mask128 >>= 32;
-    mask.s2 = (mask128 & uint_mask).to_ulong();
-    mask128 >>= 32;
-    mask.s3 = (mask128 & uint_mask).to_ulong();
-    return mask;
-// sub_group_broadcast - each work_item registers it's own value.
-// All work_items in subgroup takes one value from only one (any) work_item
-// sub_group_broadcast_first - same as type 0. All work_items in
-// subgroup takes only one value from only one chosen (the smallest subgroup ID)
-// work_item
-// sub_group_non_uniform_broadcast - same as type 0 but
-// only 4 work_items from subgroup enter the code (are active)
-template <typename Ty, SubgroupsBroadcastOp operation> struct BC
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int i, ii, j, k, n;
-        int ng = test_params.global_workgroup_size;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        int d = ns > 100 ? 100 : ns;
-        int non_uniform_size = ng % nw;
-        ng = ng / nw;
-        int last_subgroup_size = 0;
-        ii = 0;
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-            ng++;
-        }
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            if (non_uniform_size && k == ng - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, nj, ns, nw,
-                                          last_subgroup_size);
-            }
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                if (last_subgroup_size && j == nj - 1)
-                {
-                    n = last_subgroup_size;
-                }
-                else
-                {
-                    n = ii + ns > nw ? nw - ii : ns;
-                }
-                int bcast_if = 0;
-                int bcast_elseif = 0;
-                int bcast_index = (int)(genrand_int32(gMTdata) & 0x7fffffff)
-                    % (d > n ? n : d);
-                // l - calculate subgroup local id from which value will be
-                // broadcasted (one the same value for whole subgroup)
-                if (operation != SubgroupsBroadcastOp::broadcast)
-                {
-                    // reduce brodcasting index in case of non_uniform and
-                    // last workgroup last subgroup
-                    if (last_subgroup_size && j == nj - 1
-                        && last_subgroup_size < NR_OF_ACTIVE_WORK_ITEMS)
-                    {
-                        bcast_if = bcast_index % last_subgroup_size;
-                        bcast_elseif = bcast_if;
-                    }
-                    else
-                    {
-                        bcast_if = bcast_index % NR_OF_ACTIVE_WORK_ITEMS;
-                        bcast_elseif = NR_OF_ACTIVE_WORK_ITEMS
-                            + bcast_index % (n - NR_OF_ACTIVE_WORK_ITEMS);
-                    }
-                }
-                for (i = 0; i < n; ++i)
-                {
-                    if (operation == SubgroupsBroadcastOp::broadcast)
-                    {
-                        int midx = 4 * ii + 4 * i + 2;
-                        m[midx] = (cl_int)bcast_index;
-                    }
-                    else
-                    {
-                        if (i < NR_OF_ACTIVE_WORK_ITEMS)
-                        {
-                            // index of the third
-                            // element int the vector.
-                            int midx = 4 * ii + 4 * i + 2;
-                            // storing information about
-                            // broadcasting index -
-                            // earlier calculated
-                            m[midx] = (cl_int)bcast_if;
-                        }
-                        else
-                        { // index of the third
-                          // element int the vector.
-                            int midx = 4 * ii + 4 * i + 3;
-                            m[midx] = (cl_int)bcast_elseif;
-                        }
-                    }
-                    // calculate value for broadcasting
-                    cl_ulong number = genrand_int64(gMTdata);
-                    set_value(t[ii + i], number);
-                }
-            }
-            // Now map into work group using map from device
-            for (j = 0; j < nw; ++j)
-            { // for each element in work_group
-                // calculate index as number of subgroup
-                // plus subgroup local id
-                x[j] = t[j];
-            }
-            x += nw;
-            m += 4 * nw;
-        }
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, l, n;
-        int ng = test_params.global_workgroup_size;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        Ty tr, rr;
-        int non_uniform_size = ng % nw;
-        ng = ng / nw;
-        int last_subgroup_size = 0;
-        if (non_uniform_size) ng++;
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            if (non_uniform_size && k == ng - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, nj, ns, nw,
-                                          last_subgroup_size);
-            }
-            for (j = 0; j < nw; ++j)
-            { // inside the work_group
-                mx[j] = x[j]; // read host inputs for work_group
-                my[j] = y[j]; // read device outputs for work_group
-            }
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                if (last_subgroup_size && j == nj - 1)
-                {
-                    n = last_subgroup_size;
-                }
-                else
-                {
-                    n = ii + ns > nw ? nw - ii : ns;
-                }
-                // Check result
-                if (operation == SubgroupsBroadcastOp::broadcast_first)
-                {
-                    int lowest_active_id = -1;
-                    for (i = 0; i < n; ++i)
-                    {
-                        lowest_active_id = i < NR_OF_ACTIVE_WORK_ITEMS
-                            ? 0
-                            : NR_OF_ACTIVE_WORK_ITEMS;
-                        //  findout if broadcasted
-                        //  value is the same
-                        tr = mx[ii + lowest_active_id];
-                        //  findout if broadcasted to all
-                        rr = my[ii + i];
-                        if (!compare(rr, tr))
-                        {
-                            log_error(
-                                "ERROR: sub_group_broadcast_first(%s) "
-                                "mismatch "
-                                "for local id %d in sub group %d in group "
-                                "%d\n",
-                                TypeManager<Ty>::name(), i, j, k);
-                            return TEST_FAIL;
-                        }
-                    }
-                }
-                else
-                {
-                    for (i = 0; i < n; ++i)
-                    {
-                        if (operation == SubgroupsBroadcastOp::broadcast)
-                        {
-                            int midx = 4 * ii + 4 * i + 2;
-                            l = (int)m[midx];
-                            tr = mx[ii + l];
-                        }
-                        else
-                        {
-                            if (i < NR_OF_ACTIVE_WORK_ITEMS)
-                            { // take index of array where info
-                              // which work_item will be
-                              // broadcast its value is stored
-                                int midx = 4 * ii + 4 * i + 2;
-                                // take subgroup local id of
-                                // this work_item
-                                l = (int)m[midx];
-                                // take value generated on host
-                                // for this work_item
-                                tr = mx[ii + l];
-                            }
-                            else
-                            {
-                                int midx = 4 * ii + 4 * i + 3;
-                                l = (int)m[midx];
-                                tr = mx[ii + l];
-                            }
-                        }
-                        rr = my[ii + i]; // read device outputs for
-                                         // work_item in the subgroup
-                        if (!compare(rr, tr))
-                        {
-                            log_error("ERROR: sub_group_%s(%s) "
-                                      "mismatch for local id %d in sub "
-                                      "group %d in group %d - got %lu "
-                                      "expected %lu\n",
-                                      operation_names(operation),
-                                      TypeManager<Ty>::name(), i, j, k, rr, tr);
-                            return TEST_FAIL;
-                        }
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  sub_group_%s(%s)... passed\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-static float to_float(subgroups::cl_half x) { return cl_half_to_float(; }
-static subgroups::cl_half to_half(float x)
-    subgroups::cl_half value;
- = cl_half_from_float(x, CL_HALF_RTE);
-    return value;
-// for integer types
-template <typename Ty> inline Ty calculate(Ty a, Ty b, ArithmeticOp operation)
-    switch (operation)
-    {
-        case ArithmeticOp::add_: return a + b;
-        case ArithmeticOp::max_: return a > b ? a : b;
-        case ArithmeticOp::min_: return a < b ? a : b;
-        case ArithmeticOp::mul_: return a * b;
-        case ArithmeticOp::and_: return a & b;
-        case ArithmeticOp::or_: return a | b;
-        case ArithmeticOp::xor_: return a ^ b;
-        case ArithmeticOp::logical_and: return a && b;
-        case ArithmeticOp::logical_or: return a || b;
-        case ArithmeticOp::logical_xor: return !a ^ !b;
-        default: log_error("Unknown operation request"); break;
-    }
-    return 0;
-// Specialize for floating points.
-template <>
-inline cl_double calculate(cl_double a, cl_double b, ArithmeticOp operation)
-    switch (operation)
-    {
-        case ArithmeticOp::add_: {
-            return a + b;
-        }
-        case ArithmeticOp::max_: {
-            return a > b ? a : b;
-        }
-        case ArithmeticOp::min_: {
-            return a < b ? a : b;
-        }
-        case ArithmeticOp::mul_: {
-            return a * b;
-        }
-        default: log_error("Unknown operation request"); break;
-    }
-    return 0;
-template <>
-inline cl_float calculate(cl_float a, cl_float b, ArithmeticOp operation)
-    switch (operation)
-    {
-        case ArithmeticOp::add_: {
-            return a + b;
-        }
-        case ArithmeticOp::max_: {
-            return a > b ? a : b;
-        }
-        case ArithmeticOp::min_: {
-            return a < b ? a : b;
-        }
-        case ArithmeticOp::mul_: {
-            return a * b;
-        }
-        default: log_error("Unknown operation request"); break;
-    }
-    return 0;
-template <>
-inline subgroups::cl_half calculate(subgroups::cl_half a, subgroups::cl_half b,
-                                    ArithmeticOp operation)
-    switch (operation)
-    {
-        case ArithmeticOp::add_: return to_half(to_float(a) + to_float(b));
-        case ArithmeticOp::max_:
-            return to_float(a) > to_float(b) || is_half_nan( ? a : b;
-        case ArithmeticOp::min_:
-            return to_float(a) < to_float(b) || is_half_nan( ? a : b;
-        case ArithmeticOp::mul_: return to_half(to_float(a) * to_float(b));
-        default: log_error("Unknown operation request"); break;
-    }
-    return to_half(0);
-template <typename Ty> bool is_floating_point()
-    return std::is_floating_point<Ty>::value
-        || std::is_same<Ty, subgroups::cl_half>::value;
-template <typename Ty, ArithmeticOp operation>
-void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
-    int nj = (nw + ns - 1) / ns;
-    for (int k = 0; k < ng; ++k)
-    {
-        for (int j = 0; j < nj; ++j)
-        {
-            int ii = j * ns;
-            int n = ii + ns > nw ? nw - ii : ns;
-            for (int i = 0; i < n; ++i)
-            {
-                cl_ulong out_value;
-                double y;
-                if (operation == ArithmeticOp::mul_
-                    || operation == ArithmeticOp::add_)
-                {
-                    // work around to avoid overflow, do not use 0 for
-                    // multiplication
-                    out_value = (genrand_int32(gMTdata) % 4) + 1;
-                }
-                else
-                {
-                    out_value = genrand_int64(gMTdata) % (32 * n);
-                    if ((operation == ArithmeticOp::logical_and
-                         || operation == ArithmeticOp::logical_or
-                         || operation == ArithmeticOp::logical_xor)
-                        && ((out_value >> 32) & 1) == 0)
-                        out_value = 0; // increase probability of false
-                }
-                set_value(t[ii + i], out_value);
-            }
-        }
-        // Now map into work group using map from device
-        for (int j = 0; j < nw; ++j)
-        {
-            x[j] = t[j];
-        }
-        x += nw;
-        m += 4 * nw;
-    }
-template <typename Ty, ShuffleOp operation> struct SHF
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int i, ii, j, k, l, n, delta;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        int d = ns > 100 ? 100 : ns;
-        ii = 0;
-        ng = ng / nw;
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                for (i = 0; i < n; ++i)
-                {
-                    int midx = 4 * ii + 4 * i + 2;
-                    l = (int)(genrand_int32(gMTdata) & 0x7fffffff)
-                        % (d > n ? n : d);
-                    switch (operation)
-                    {
-                        case ShuffleOp::shuffle:
-                        case ShuffleOp::shuffle_xor:
-                            // storing information about shuffle index
-                            m[midx] = (cl_int)l;
-                            break;
-                        case ShuffleOp::shuffle_up:
-                            delta = l; // calculate delta for shuffle up
-                            if (i - delta < 0)
-                            {
-                                delta = i;
-                            }
-                            m[midx] = (cl_int)delta;
-                            break;
-                        case ShuffleOp::shuffle_down:
-                            delta = l; // calculate delta for shuffle down
-                            if (i + delta >= n)
-                            {
-                                delta = n - 1 - i;
-                            }
-                            m[midx] = (cl_int)delta;
-                            break;
-                        default: break;
-                    }
-                    cl_ulong number = genrand_int64(gMTdata);
-                    set_value(t[ii + i], number);
-                }
-            }
-            // Now map into work group using map from device
-            for (j = 0; j < nw; ++j)
-            { // for each element in work_group
-                x[j] = t[j];
-            }
-            x += nw;
-            m += 4 * nw;
-        }
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, l, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        Ty tr, rr;
-        ng = ng / nw;
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            for (j = 0; j < nw; ++j)
-            { // inside the work_group
-                mx[j] = x[j]; // read host inputs for work_group
-                my[j] = y[j]; // read device outputs for work_group
-            }
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                for (i = 0; i < n; ++i)
-                { // inside the subgroup
-                  // shuffle index storage
-                    int midx = 4 * ii + 4 * i + 2;
-                    l = (int)m[midx];
-                    rr = my[ii + i];
-                    switch (operation)
-                    {
-                        // shuffle basic - treat l as index
-                        case ShuffleOp::shuffle: tr = mx[ii + l]; break;
-                        // shuffle up - treat l as delta
-                        case ShuffleOp::shuffle_up: tr = mx[ii + i - l]; break;
-                        // shuffle up - treat l as delta
-                        case ShuffleOp::shuffle_down:
-                            tr = mx[ii + i + l];
-                            break;
-                        // shuffle xor - treat l as mask
-                        case ShuffleOp::shuffle_xor:
-                            tr = mx[ii + (i ^ l)];
-                            break;
-                        default: break;
-                    }
-                    if (!compare(rr, tr))
-                    {
-                        log_error("ERROR: sub_group_%s(%s) mismatch for "
-                                  "local id %d in sub group %d in group %d\n",
-                                  operation_names(operation),
-                                  TypeManager<Ty>::name(), i, j, k);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  sub_group_%s(%s)... passed\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-template <typename Ty, ArithmeticOp operation> struct SCEX_NU
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        ng = ng / nw;
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
-                        : func_name = "sub_group_scan_exclusive";
-        log_info("  %s_%s(%s)...\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x \n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask);
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        int nj = (nw + ns - 1) / ns;
-        Ty tr, rr;
-        ng = ng / nw;
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
-                        : func_name = "sub_group_scan_exclusive";
-        uint32_t use_work_items_mask;
-        // for uniform case take into consideration all workitems
-        use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            // Map to array indexed to array indexed by local ID and sub group
-            for (j = 0; j < nw; ++j)
-            { // inside the work_group
-                mx[j] = x[j]; // read host inputs for work_group
-                my[j] = y[j]; // read device outputs for work_group
-            }
-            for (j = 0; j < nj; ++j)
-            {
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                std::set<int> active_work_items;
-                for (i = 0; i < n; ++i)
-                {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (use_work_items_mask & check_work_item)
-                    {
-                        active_work_items.insert(i);
-                    }
-                }
-                if (active_work_items.empty())
-                {
-                    log_info("  No acitve workitems in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
-                    continue;
-                }
-                else if (active_work_items.size() == 1)
-                {
-                    log_info("  One active workitem in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
-                    continue;
-                }
-                else
-                {
-                    tr = TypeManager<Ty>::identify_limits(operation);
-                    int idx = 0;
-                    for (const int &active_work_item : active_work_items)
-                    {
-                        rr = my[ii + active_work_item];
-                        if (idx == 0) continue;
-                        if (!compare_ordered(rr, tr))
-                        {
-                            log_error(
-                                "ERROR: %s_%s(%s) "
-                                "mismatch for local id %d in sub group %d in "
-                                "group %d Expected: %d Obtained: %d\n",
-                                func_name.c_str(), operation_names(operation),
-                                TypeManager<Ty>::name(), i, j, k, tr, rr);
-                            return TEST_FAIL;
-                        }
-                        tr = calculate<Ty>(tr, mx[ii + active_work_item],
-                                           operation);
-                        idx++;
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  %s_%s(%s)... passed\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-// Test for scan inclusive non uniform functions
-template <typename Ty, ArithmeticOp operation> struct SCIN_NU
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        ng = ng / nw;
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
-                        : func_name = "sub_group_scan_inclusive";
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
-        log_info("  %s_%s(%s)...\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x \n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask);
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        int nj = (nw + ns - 1) / ns;
-        Ty tr, rr;
-        ng = ng / nw;
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
-                        : func_name = "sub_group_scan_inclusive";
-        uint32_t use_work_items_mask;
-        // for uniform case take into consideration all workitems
-        use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
-        // std::bitset<32> mask32(use_work_items_mask);
-        // for (int k) mask32.count();
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            // Map to array indexed to array indexed by local ID and sub group
-            for (j = 0; j < nw; ++j)
-            { // inside the work_group
-                mx[j] = x[j]; // read host inputs for work_group
-                my[j] = y[j]; // read device outputs for work_group
-            }
-            for (j = 0; j < nj; ++j)
-            {
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                std::set<int> active_work_items;
-                int catch_frist_active = -1;
-                for (i = 0; i < n; ++i)
-                {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (use_work_items_mask & check_work_item)
-                    {
-                        if (catch_frist_active == -1)
-                        {
-                            catch_frist_active = i;
-                        }
-                        active_work_items.insert(i);
-                    }
-                }
-                if (active_work_items.empty())
-                {
-                    log_info("  No acitve workitems in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
-                    continue;
-                }
-                else
-                {
-                    tr = TypeManager<Ty>::identify_limits(operation);
-                    for (const int &active_work_item : active_work_items)
-                    {
-                        rr = my[ii + active_work_item];
-                        if (active_work_items.size() == 1)
-                        {
-                            tr = mx[ii + catch_frist_active];
-                        }
-                        else
-                        {
-                            tr = calculate<Ty>(tr, mx[ii + active_work_item],
-                                               operation);
-                        }
-                        if (!compare_ordered<Ty>(rr, tr))
-                        {
-                            log_error(
-                                "ERROR: %s_%s(%s) "
-                                "mismatch for local id %d in sub group %d "
-                                "in "
-                                "group %d Expected: %d Obtained: %d\n",
-                                func_name.c_str(), operation_names(operation),
-                                TypeManager<Ty>::name(), active_work_item, j, k,
-                                tr, rr);
-                            return TEST_FAIL;
-                        }
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  %s_%s(%s)... passed\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-// Test for reduce non uniform functions
-template <typename Ty, ArithmeticOp operation> struct RED_NU
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        ng = ng / nw;
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_reduce"
-                        : func_name = "sub_group_reduce";
-        log_info("  %s_%s(%s)...\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x \n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask);
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
-        Ty tr, rr;
-        std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_reduce"
-                        : func_name = "sub_group_reduce";
-        for (k = 0; k < ng; ++k)
-        {
-            // Map to array indexed to array indexed by local ID and sub
-            // group
-            for (j = 0; j < nw; ++j)
-            {
-                mx[j] = x[j];
-                my[j] = y[j];
-            }
-            uint32_t use_work_items_mask;
-            use_work_items_mask =
-                !work_items_mask ? 0xFFFFFFFF : work_items_mask;
-            for (j = 0; j < nj; ++j)
-            {
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                std::set<int> active_work_items;
-                int catch_frist_active = -1;
-                for (i = 0; i < n; ++i)
-                {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (use_work_items_mask & check_work_item)
-                    {
-                        if (catch_frist_active == -1)
-                        {
-                            catch_frist_active = i;
-                            tr = mx[ii + i];
-                            active_work_items.insert(i);
-                            continue;
-                        }
-                        active_work_items.insert(i);
-                        tr = calculate<Ty>(tr, mx[ii + i], operation);
-                    }
-                }
-                if (active_work_items.empty())
-                {
-                    log_info("  No acitve workitems in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
-                    continue;
-                }
-                for (const int &active_work_item : active_work_items)
-                {
-                    rr = my[ii + active_work_item];
-                    if (!compare_ordered<Ty>(rr, tr))
-                    {
-                        log_error("ERROR: %s_%s(%s) "
-                                  "mismatch for local id %d in sub group %d in "
-                                  "group %d Expected: %d Obtained: %d\n",
-                                  func_name.c_str(), operation_names(operation),
-                                  TypeManager<Ty>::name(), active_work_item, j,
-                                  k, tr, rr);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  %s_%s(%s)... passed\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 93673b3..6e84ccb 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -19,176 +19,13 @@
 #include "testHarness.h"
 #include "kernelHelpers.h"
 #include "typeWrappers.h"
-#include "imageHelpers.h"
 #include <limits>
 #include <vector>
-#include <type_traits>
-extern MTdata gMTdata;
-struct WorkGroupParams
-    WorkGroupParams(size_t gws, size_t lws,
-                    const std::vector<std::string> &req_ext = {},
-                    const std::vector<uint32_t> &all_wim = {})
-        : global_workgroup_size(gws), local_workgroup_size(lws),
-          required_extensions(req_ext), all_work_item_masks(all_wim)
-    {
-        subgroup_size = 0;
-        work_items_mask = 0;
-        use_core_subgroups = true;
-        dynsc = 0;
-    }
-    size_t global_workgroup_size;
-    size_t local_workgroup_size;
-    size_t subgroup_size;
-    uint32_t work_items_mask;
-    int dynsc;
-    bool use_core_subgroups;
-    std::vector<std::string> required_extensions;
-    std::vector<uint32_t> all_work_item_masks;
-enum class SubgroupsBroadcastOp
-    broadcast,
-    broadcast_first,
-    non_uniform_broadcast
-enum class NonUniformVoteOp
-    elect,
-    all,
-    any,
-    all_equal
-enum class BallotOp
-    ballot,
-    inverse_ballot,
-    ballot_bit_extract,
-    ballot_bit_count,
-    ballot_inclusive_scan,
-    ballot_exclusive_scan,
-    ballot_find_lsb,
-    ballot_find_msb,
-    eq_mask,
-    ge_mask,
-    gt_mask,
-    le_mask,
-    lt_mask,
-enum class ShuffleOp
-    shuffle,
-    shuffle_up,
-    shuffle_down,
-    shuffle_xor
-enum class ArithmeticOp
-    add_,
-    max_,
-    min_,
-    mul_,
-    and_,
-    or_,
-    xor_,
-    logical_and,
-    logical_or,
-    logical_xor
-static const char *const operation_names(ArithmeticOp operation)
-    switch (operation)
-    {
-        case ArithmeticOp::add_: return "add";
-        case ArithmeticOp::max_: return "max";
-        case ArithmeticOp::min_: return "min";
-        case ArithmeticOp::mul_: return "mul";
-        case ArithmeticOp::and_: return "and";
-        case ArithmeticOp::or_: return "or";
-        case ArithmeticOp::xor_: return "xor";
-        case ArithmeticOp::logical_and: return "logical_and";
-        case ArithmeticOp::logical_or: return "logical_or";
-        case ArithmeticOp::logical_xor: return "logical_xor";
-        default: log_error("Unknown operation request"); break;
-    }
-    return "";
-static const char *const operation_names(BallotOp operation)
-    switch (operation)
-    {
-        case BallotOp::ballot: return "ballot";
-        case BallotOp::inverse_ballot: return "inverse_ballot";
-        case BallotOp::ballot_bit_extract: return "bit_extract";
-        case BallotOp::ballot_bit_count: return "bit_count";
-        case BallotOp::ballot_inclusive_scan: return "inclusive_scan";
-        case BallotOp::ballot_exclusive_scan: return "exclusive_scan";
-        case BallotOp::ballot_find_lsb: return "find_lsb";
-        case BallotOp::ballot_find_msb: return "find_msb";
-        case BallotOp::eq_mask: return "eq";
-        case BallotOp::ge_mask: return "ge";
-        case BallotOp::gt_mask: return "gt";
-        case BallotOp::le_mask: return "le";
-        case BallotOp::lt_mask: return "lt";
-        default: log_error("Unknown operation request"); break;
-    }
-    return "";
-static const char *const operation_names(ShuffleOp operation)
-    switch (operation)
-    {
-        case ShuffleOp::shuffle: return "shuffle";
-        case ShuffleOp::shuffle_up: return "shuffle_up";
-        case ShuffleOp::shuffle_down: return "shuffle_down";
-        case ShuffleOp::shuffle_xor: return "shuffle_xor";
-        default: log_error("Unknown operation request"); break;
-    }
-    return "";
-static const char *const operation_names(NonUniformVoteOp operation)
-    switch (operation)
-    {
-        case NonUniformVoteOp::all: return "all";
-        case NonUniformVoteOp::all_equal: return "all_equal";
-        case NonUniformVoteOp::any: return "any";
-        case NonUniformVoteOp::elect: return "elect";
-        default: log_error("Unknown operation request"); break;
-    }
-    return "";
-static const char *const operation_names(SubgroupsBroadcastOp operation)
-    switch (operation)
-    {
-        case SubgroupsBroadcastOp::broadcast: return "broadcast";
-        case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first";
-        case SubgroupsBroadcastOp::non_uniform_broadcast:
-            return "non_uniform_broadcast";
-        default: log_error("Unknown operation request"); break;
-    }
-    return "";
 class subgroupsAPI {
-    subgroupsAPI(cl_platform_id platform, bool use_core_subgroups)
+    subgroupsAPI(cl_platform_id platform, bool useCoreSubgroups)
                           == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
@@ -196,7 +33,7 @@
                           == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
                       "Enums have to be the same");
-        if (use_core_subgroups)
+        if (useCoreSubgroups)
             _clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo;
             clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo";
@@ -219,76 +56,163 @@
     clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr;
-// Need to defined custom type for vector size = 3 and half type. This is
-// because of 3-component types are otherwise indistinguishable from the
-// 4-component types, and because the half type is indistinguishable from some
-// other 16-bit type (ushort)
-namespace subgroups {
-struct cl_char3
+// Some template helpers
+template <typename Ty> struct TypeName;
+template <> struct TypeName<cl_half>
-    ::cl_char3 data;
+    static const char *val() { return "half"; }
-struct cl_uchar3
+template <> struct TypeName<cl_uint>
-    ::cl_uchar3 data;
+    static const char *val() { return "uint"; }
-struct cl_short3
+template <> struct TypeName<cl_int>
-    ::cl_short3 data;
+    static const char *val() { return "int"; }
-struct cl_ushort3
+template <> struct TypeName<cl_ulong>
-    ::cl_ushort3 data;
+    static const char *val() { return "ulong"; }
-struct cl_int3
+template <> struct TypeName<cl_long>
-    ::cl_int3 data;
+    static const char *val() { return "long"; }
-struct cl_uint3
+template <> struct TypeName<float>
-    ::cl_uint3 data;
+    static const char *val() { return "float"; }
-struct cl_long3
+template <> struct TypeName<double>
-    ::cl_long3 data;
+    static const char *val() { return "double"; }
-struct cl_ulong3
+template <typename Ty> struct TypeDef;
+template <> struct TypeDef<cl_half>
-    ::cl_ulong3 data;
+    static const char *val() { return "typedef half Type;\n"; }
-struct cl_float3
+template <> struct TypeDef<cl_uint>
-    ::cl_float3 data;
+    static const char *val() { return "typedef uint Type;\n"; }
-struct cl_double3
+template <> struct TypeDef<cl_int>
-    ::cl_double3 data;
+    static const char *val() { return "typedef int Type;\n"; }
-struct cl_half
+template <> struct TypeDef<cl_ulong>
-    ::cl_half data;
+    static const char *val() { return "typedef ulong Type;\n"; }
-struct cl_half2
+template <> struct TypeDef<cl_long>
-    ::cl_half2 data;
+    static const char *val() { return "typedef long Type;\n"; }
-struct cl_half3
+template <> struct TypeDef<float>
-    ::cl_half3 data;
+    static const char *val() { return "typedef float Type;\n"; }
-struct cl_half4
+template <> struct TypeDef<double>
-    ::cl_half4 data;
+    static const char *val() { return "typedef double Type;\n"; }
-struct cl_half8
+template <typename Ty, int Which> struct TypeIdentity;
+// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return
+// (cl_half)0.0; } }; template <> struct TypeIdentity<cl_half,0> { static
+// cl_half val() { return -(cl_half)65536.0; } }; template <> struct
+// TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)65536.0; }
+// };
+template <> struct TypeIdentity<cl_uint, 0>
-    ::cl_half8 data;
+    static cl_uint val() { return (cl_uint)0; }
-struct cl_half16
+template <> struct TypeIdentity<cl_uint, 1>
-    ::cl_half16 data;
+    static cl_uint val() { return (cl_uint)0; }
+template <> struct TypeIdentity<cl_uint, 2>
+    static cl_uint val() { return (cl_uint)0xffffffff; }
+template <> struct TypeIdentity<cl_int, 0>
+    static cl_int val() { return (cl_int)0; }
+template <> struct TypeIdentity<cl_int, 1>
+    static cl_int val() { return (cl_int)0x80000000; }
+template <> struct TypeIdentity<cl_int, 2>
+    static cl_int val() { return (cl_int)0x7fffffff; }
+template <> struct TypeIdentity<cl_ulong, 0>
+    static cl_ulong val() { return (cl_ulong)0; }
+template <> struct TypeIdentity<cl_ulong, 1>
+    static cl_ulong val() { return (cl_ulong)0; }
+template <> struct TypeIdentity<cl_ulong, 2>
+    static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL; }
+template <> struct TypeIdentity<cl_long, 0>
+    static cl_long val() { return (cl_long)0; }
+template <> struct TypeIdentity<cl_long, 1>
+    static cl_long val() { return (cl_long)0x8000000000000000ULL; }
+template <> struct TypeIdentity<cl_long, 2>
+    static cl_long val() { return (cl_long)0x7fffffffffffffffULL; }
+template <> struct TypeIdentity<float, 0>
+    static float val() { return 0.F; }
+template <> struct TypeIdentity<float, 1>
+    static float val() { return -std::numeric_limits<float>::infinity(); }
+template <> struct TypeIdentity<float, 2>
+    static float val() { return std::numeric_limits<float>::infinity(); }
+template <> struct TypeIdentity<double, 0>
+    static double val() { return 0.L; }
+template <> struct TypeIdentity<double, 1>
+    static double val() { return -std::numeric_limits<double>::infinity(); }
+template <> struct TypeIdentity<double, 2>
+    static double val() { return std::numeric_limits<double>::infinity(); }
+template <typename Ty> struct TypeCheck;
+template <> struct TypeCheck<cl_uint>
+    static bool val(cl_device_id) { return true; }
+template <> struct TypeCheck<cl_int>
+    static bool val(cl_device_id) { return true; }
 static bool int64_ok(cl_device_id device)
@@ -309,860 +233,43 @@
     return true;
-static bool double_ok(cl_device_id device)
+template <> struct TypeCheck<cl_ulong>
-    int error;
-    cl_device_fp_config c;
-    error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c),
-                            (void *)&c, NULL);
-    if (error)
+    static bool val(cl_device_id device) { return int64_ok(device); }
+template <> struct TypeCheck<cl_long>
+    static bool val(cl_device_id device) { return int64_ok(device); }
+template <> struct TypeCheck<cl_float>
+    static bool val(cl_device_id) { return true; }
+template <> struct TypeCheck<cl_half>
+    static bool val(cl_device_id device)
-        log_info("clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
-        return false;
+        return is_extension_available(device, "cl_khr_fp16");
-    return c != 0;
-static bool half_ok(cl_device_id device)
+template <> struct TypeCheck<double>
-    int error;
-    cl_device_fp_config c;
-    error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG, sizeof(c),
-                            (void *)&c, NULL);
-    if (error)
+    static bool val(cl_device_id device)
-        log_info("clGetDeviceInfo failed with CL_DEVICE_HALF_FP_CONFIG\n");
-        return false;
-    }
-    return c != 0;
-template <typename Ty> struct CommonTypeManager
-    static const char *name() { return ""; }
-    static const char *add_typedef() { return "\n"; }
-    typedef std::false_type is_vector_type;
-    typedef std::false_type is_sb_vector_size3;
-    typedef std::false_type is_sb_vector_type;
-    typedef std::false_type is_sb_scalar_type;
-    static const bool type_supported(cl_device_id) { return true; }
-    static const Ty identify_limits(ArithmeticOp operation)
-    {
-        switch (operation)
+        int error;
+        cl_device_fp_config c;
+        error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c),
+                                (void *)&c, NULL);
+        if (error)
-            case ArithmeticOp::add_: return (Ty)0;
-            case ArithmeticOp::max_: return (std::numeric_limits<Ty>::min)();
-            case ArithmeticOp::min_: return (std::numeric_limits<Ty>::max)();
-            case ArithmeticOp::mul_: return (Ty)1;
-            case ArithmeticOp::and_: return (Ty)~0;
-            case ArithmeticOp::or_: return (Ty)0;
-            case ArithmeticOp::xor_: return (Ty)0;
-            default: log_error("Unknown operation request"); break;
-        }
-        return 0;
-    }
-template <typename> struct TypeManager;
-template <> struct TypeManager<cl_int> : public CommonTypeManager<cl_int>
-    static const char *name() { return "int"; }
-    static const char *add_typedef() { return "typedef int Type;\n"; }
-    static cl_int identify_limits(ArithmeticOp operation)
-    {
-        switch (operation)
-        {
-            case ArithmeticOp::add_: return (cl_int)0;
-            case ArithmeticOp::max_:
-                return (std::numeric_limits<cl_int>::min)();
-            case ArithmeticOp::min_:
-                return (std::numeric_limits<cl_int>::max)();
-            case ArithmeticOp::mul_: return (cl_int)1;
-            case ArithmeticOp::and_: return (cl_int)~0;
-            case ArithmeticOp::or_: return (cl_int)0;
-            case ArithmeticOp::xor_: return (cl_int)0;
-            case ArithmeticOp::logical_and: return (cl_int)1;
-            case ArithmeticOp::logical_or: return (cl_int)0;
-            case ArithmeticOp::logical_xor: return (cl_int)0;
-            default: log_error("Unknown operation request"); break;
-        }
-        return 0;
-    }
-template <> struct TypeManager<cl_int2> : public CommonTypeManager<cl_int2>
-    static const char *name() { return "int2"; }
-    static const char *add_typedef() { return "typedef int2 Type;\n"; }
-    typedef std::true_type is_vector_type;
-    using scalar_type = cl_int;
-template <>
-struct TypeManager<subgroups::cl_int3>
-    : public CommonTypeManager<subgroups::cl_int3>
-    static const char *name() { return "int3"; }
-    static const char *add_typedef() { return "typedef int3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_int;
-template <> struct TypeManager<cl_int4> : public CommonTypeManager<cl_int4>
-    static const char *name() { return "int4"; }
-    static const char *add_typedef() { return "typedef int4 Type;\n"; }
-    using scalar_type = cl_int;
-    typedef std::true_type is_vector_type;
-template <> struct TypeManager<cl_int8> : public CommonTypeManager<cl_int8>
-    static const char *name() { return "int8"; }
-    static const char *add_typedef() { return "typedef int8 Type;\n"; }
-    using scalar_type = cl_int;
-    typedef std::true_type is_vector_type;
-template <> struct TypeManager<cl_int16> : public CommonTypeManager<cl_int16>
-    static const char *name() { return "int16"; }
-    static const char *add_typedef() { return "typedef int16 Type;\n"; }
-    using scalar_type = cl_int;
-    typedef std::true_type is_vector_type;
-// cl_uint
-template <> struct TypeManager<cl_uint> : public CommonTypeManager<cl_uint>
-    static const char *name() { return "uint"; }
-    static const char *add_typedef() { return "typedef uint Type;\n"; }
-template <> struct TypeManager<cl_uint2> : public CommonTypeManager<cl_uint2>
-    static const char *name() { return "uint2"; }
-    static const char *add_typedef() { return "typedef uint2 Type;\n"; }
-    using scalar_type = cl_uint;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<subgroups::cl_uint3>
-    : public CommonTypeManager<subgroups::cl_uint3>
-    static const char *name() { return "uint3"; }
-    static const char *add_typedef() { return "typedef uint3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_uint;
-template <> struct TypeManager<cl_uint4> : public CommonTypeManager<cl_uint4>
-    static const char *name() { return "uint4"; }
-    static const char *add_typedef() { return "typedef uint4 Type;\n"; }
-    using scalar_type = cl_uint;
-    typedef std::true_type is_vector_type;
-template <> struct TypeManager<cl_uint8> : public CommonTypeManager<cl_uint8>
-    static const char *name() { return "uint8"; }
-    static const char *add_typedef() { return "typedef uint8 Type;\n"; }
-    using scalar_type = cl_uint;
-    typedef std::true_type is_vector_type;
-template <> struct TypeManager<cl_uint16> : public CommonTypeManager<cl_uint16>
-    static const char *name() { return "uint16"; }
-    static const char *add_typedef() { return "typedef uint16 Type;\n"; }
-    using scalar_type = cl_uint;
-    typedef std::true_type is_vector_type;
-// cl_short
-template <> struct TypeManager<cl_short> : public CommonTypeManager<cl_short>
-    static const char *name() { return "short"; }
-    static const char *add_typedef() { return "typedef short Type;\n"; }
-template <> struct TypeManager<cl_short2> : public CommonTypeManager<cl_short2>
-    static const char *name() { return "short2"; }
-    static const char *add_typedef() { return "typedef short2 Type;\n"; }
-    using scalar_type = cl_short;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<subgroups::cl_short3>
-    : public CommonTypeManager<subgroups::cl_short3>
-    static const char *name() { return "short3"; }
-    static const char *add_typedef() { return "typedef short3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_short;
-template <> struct TypeManager<cl_short4> : public CommonTypeManager<cl_short4>
-    static const char *name() { return "short4"; }
-    static const char *add_typedef() { return "typedef short4 Type;\n"; }
-    using scalar_type = cl_short;
-    typedef std::true_type is_vector_type;
-template <> struct TypeManager<cl_short8> : public CommonTypeManager<cl_short8>
-    static const char *name() { return "short8"; }
-    static const char *add_typedef() { return "typedef short8 Type;\n"; }
-    using scalar_type = cl_short;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<cl_short16> : public CommonTypeManager<cl_short16>
-    static const char *name() { return "short16"; }
-    static const char *add_typedef() { return "typedef short16 Type;\n"; }
-    using scalar_type = cl_short;
-    typedef std::true_type is_vector_type;
-// cl_ushort
-template <> struct TypeManager<cl_ushort> : public CommonTypeManager<cl_ushort>
-    static const char *name() { return "ushort"; }
-    static const char *add_typedef() { return "typedef ushort Type;\n"; }
-template <>
-struct TypeManager<cl_ushort2> : public CommonTypeManager<cl_ushort2>
-    static const char *name() { return "ushort2"; }
-    static const char *add_typedef() { return "typedef ushort2 Type;\n"; }
-    using scalar_type = cl_ushort;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<subgroups::cl_ushort3>
-    : public CommonTypeManager<subgroups::cl_ushort3>
-    static const char *name() { return "ushort3"; }
-    static const char *add_typedef() { return "typedef ushort3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_ushort;
-template <>
-struct TypeManager<cl_ushort4> : public CommonTypeManager<cl_ushort4>
-    static const char *name() { return "ushort4"; }
-    static const char *add_typedef() { return "typedef ushort4 Type;\n"; }
-    using scalar_type = cl_ushort;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<cl_ushort8> : public CommonTypeManager<cl_ushort8>
-    static const char *name() { return "ushort8"; }
-    static const char *add_typedef() { return "typedef ushort8 Type;\n"; }
-    using scalar_type = cl_ushort;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<cl_ushort16> : public CommonTypeManager<cl_ushort16>
-    static const char *name() { return "ushort16"; }
-    static const char *add_typedef() { return "typedef ushort16 Type;\n"; }
-    using scalar_type = cl_ushort;
-    typedef std::true_type is_vector_type;
-// cl_char
-template <> struct TypeManager<cl_char> : public CommonTypeManager<cl_char>
-    static const char *name() { return "char"; }
-    static const char *add_typedef() { return "typedef char Type;\n"; }
-template <> struct TypeManager<cl_char2> : public CommonTypeManager<cl_char2>
-    static const char *name() { return "char2"; }
-    static const char *add_typedef() { return "typedef char2 Type;\n"; }
-    using scalar_type = cl_char;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<subgroups::cl_char3>
-    : public CommonTypeManager<subgroups::cl_char3>
-    static const char *name() { return "char3"; }
-    static const char *add_typedef() { return "typedef char3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_char;
-template <> struct TypeManager<cl_char4> : public CommonTypeManager<cl_char4>
-    static const char *name() { return "char4"; }
-    static const char *add_typedef() { return "typedef char4 Type;\n"; }
-    using scalar_type = cl_char;
-    typedef std::true_type is_vector_type;
-template <> struct TypeManager<cl_char8> : public CommonTypeManager<cl_char8>
-    static const char *name() { return "char8"; }
-    static const char *add_typedef() { return "typedef char8 Type;\n"; }
-    using scalar_type = cl_char;
-    typedef std::true_type is_vector_type;
-template <> struct TypeManager<cl_char16> : public CommonTypeManager<cl_char16>
-    static const char *name() { return "char16"; }
-    static const char *add_typedef() { return "typedef char16 Type;\n"; }
-    using scalar_type = cl_char;
-    typedef std::true_type is_vector_type;
-// cl_uchar
-template <> struct TypeManager<cl_uchar> : public CommonTypeManager<cl_uchar>
-    static const char *name() { return "uchar"; }
-    static const char *add_typedef() { return "typedef uchar Type;\n"; }
-template <> struct TypeManager<cl_uchar2> : public CommonTypeManager<cl_uchar2>
-    static const char *name() { return "uchar2"; }
-    static const char *add_typedef() { return "typedef uchar2 Type;\n"; }
-    using scalar_type = cl_uchar;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<subgroups::cl_uchar3>
-    : public CommonTypeManager<subgroups::cl_char3>
-    static const char *name() { return "uchar3"; }
-    static const char *add_typedef() { return "typedef uchar3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_uchar;
-template <> struct TypeManager<cl_uchar4> : public CommonTypeManager<cl_uchar4>
-    static const char *name() { return "uchar4"; }
-    static const char *add_typedef() { return "typedef uchar4 Type;\n"; }
-    using scalar_type = cl_uchar;
-    typedef std::true_type is_vector_type;
-template <> struct TypeManager<cl_uchar8> : public CommonTypeManager<cl_uchar8>
-    static const char *name() { return "uchar8"; }
-    static const char *add_typedef() { return "typedef uchar8 Type;\n"; }
-    using scalar_type = cl_uchar;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<cl_uchar16> : public CommonTypeManager<cl_uchar16>
-    static const char *name() { return "uchar16"; }
-    static const char *add_typedef() { return "typedef uchar16 Type;\n"; }
-    using scalar_type = cl_uchar;
-    typedef std::true_type is_vector_type;
-// cl_long
-template <> struct TypeManager<cl_long> : public CommonTypeManager<cl_long>
-    static const char *name() { return "long"; }
-    static const char *add_typedef() { return "typedef long Type;\n"; }
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-template <> struct TypeManager<cl_long2> : public CommonTypeManager<cl_long2>
-    static const char *name() { return "long2"; }
-    static const char *add_typedef() { return "typedef long2 Type;\n"; }
-    using scalar_type = cl_long;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-template <>
-struct TypeManager<subgroups::cl_long3>
-    : public CommonTypeManager<subgroups::cl_long3>
-    static const char *name() { return "long3"; }
-    static const char *add_typedef() { return "typedef long3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_long;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-template <> struct TypeManager<cl_long4> : public CommonTypeManager<cl_long4>
-    static const char *name() { return "long4"; }
-    static const char *add_typedef() { return "typedef long4 Type;\n"; }
-    using scalar_type = cl_long;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-template <> struct TypeManager<cl_long8> : public CommonTypeManager<cl_long8>
-    static const char *name() { return "long8"; }
-    static const char *add_typedef() { return "typedef long8 Type;\n"; }
-    using scalar_type = cl_long;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-template <> struct TypeManager<cl_long16> : public CommonTypeManager<cl_long16>
-    static const char *name() { return "long16"; }
-    static const char *add_typedef() { return "typedef long16 Type;\n"; }
-    using scalar_type = cl_long;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-// cl_ulong
-template <> struct TypeManager<cl_ulong> : public CommonTypeManager<cl_ulong>
-    static const char *name() { return "ulong"; }
-    static const char *add_typedef() { return "typedef ulong Type;\n"; }
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-template <> struct TypeManager<cl_ulong2> : public CommonTypeManager<cl_ulong2>
-    static const char *name() { return "ulong2"; }
-    static const char *add_typedef() { return "typedef ulong2 Type;\n"; }
-    using scalar_type = cl_ulong;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-template <>
-struct TypeManager<subgroups::cl_ulong3>
-    : public CommonTypeManager<subgroups::cl_ulong3>
-    static const char *name() { return "ulong3"; }
-    static const char *add_typedef() { return "typedef ulong3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_ulong;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-template <> struct TypeManager<cl_ulong4> : public CommonTypeManager<cl_ulong4>
-    static const char *name() { return "ulong4"; }
-    static const char *add_typedef() { return "typedef ulong4 Type;\n"; }
-    using scalar_type = cl_ulong;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-template <> struct TypeManager<cl_ulong8> : public CommonTypeManager<cl_ulong8>
-    static const char *name() { return "ulong8"; }
-    static const char *add_typedef() { return "typedef ulong8 Type;\n"; }
-    using scalar_type = cl_ulong;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-template <>
-struct TypeManager<cl_ulong16> : public CommonTypeManager<cl_ulong16>
-    static const char *name() { return "ulong16"; }
-    static const char *add_typedef() { return "typedef ulong16 Type;\n"; }
-    using scalar_type = cl_ulong;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return int64_ok(device);
-    }
-// cl_float
-template <> struct TypeManager<cl_float> : public CommonTypeManager<cl_float>
-    static const char *name() { return "float"; }
-    static const char *add_typedef() { return "typedef float Type;\n"; }
-    static cl_float identify_limits(ArithmeticOp operation)
-    {
-        switch (operation)
-        {
-            case ArithmeticOp::add_: return 0.0f;
-            case ArithmeticOp::max_:
-                return -std::numeric_limits<float>::infinity();
-            case ArithmeticOp::min_:
-                return std::numeric_limits<float>::infinity();
-            case ArithmeticOp::mul_: return (cl_float)1;
-            default: log_error("Unknown operation request"); break;
-        }
-        return 0;
-    }
-template <> struct TypeManager<cl_float2> : public CommonTypeManager<cl_float2>
-    static const char *name() { return "float2"; }
-    static const char *add_typedef() { return "typedef float2 Type;\n"; }
-    using scalar_type = cl_float;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<subgroups::cl_float3>
-    : public CommonTypeManager<subgroups::cl_float3>
-    static const char *name() { return "float3"; }
-    static const char *add_typedef() { return "typedef float3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_float;
-template <> struct TypeManager<cl_float4> : public CommonTypeManager<cl_float4>
-    static const char *name() { return "float4"; }
-    static const char *add_typedef() { return "typedef float4 Type;\n"; }
-    using scalar_type = cl_float;
-    typedef std::true_type is_vector_type;
-template <> struct TypeManager<cl_float8> : public CommonTypeManager<cl_float8>
-    static const char *name() { return "float8"; }
-    static const char *add_typedef() { return "typedef float8 Type;\n"; }
-    using scalar_type = cl_float;
-    typedef std::true_type is_vector_type;
-template <>
-struct TypeManager<cl_float16> : public CommonTypeManager<cl_float16>
-    static const char *name() { return "float16"; }
-    static const char *add_typedef() { return "typedef float16 Type;\n"; }
-    using scalar_type = cl_float;
-    typedef std::true_type is_vector_type;
-// cl_double
-template <> struct TypeManager<cl_double> : public CommonTypeManager<cl_double>
-    static const char *name() { return "double"; }
-    static const char *add_typedef() { return "typedef double Type;\n"; }
-    static cl_double identify_limits(ArithmeticOp operation)
-    {
-        switch (operation)
-        {
-            case ArithmeticOp::add_: return 0.0;
-            case ArithmeticOp::max_:
-                return -std::numeric_limits<double>::infinity();
-            case ArithmeticOp::min_:
-                return std::numeric_limits<double>::infinity();
-            case ArithmeticOp::mul_: return (cl_double)1;
-            default: log_error("Unknown operation request"); break;
-        }
-        return 0;
-    }
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-template <>
-struct TypeManager<cl_double2> : public CommonTypeManager<cl_double2>
-    static const char *name() { return "double2"; }
-    static const char *add_typedef() { return "typedef double2 Type;\n"; }
-    using scalar_type = cl_double;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-template <>
-struct TypeManager<subgroups::cl_double3>
-    : public CommonTypeManager<subgroups::cl_double3>
-    static const char *name() { return "double3"; }
-    static const char *add_typedef() { return "typedef double3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = cl_double;
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-template <>
-struct TypeManager<cl_double4> : public CommonTypeManager<cl_double4>
-    static const char *name() { return "double4"; }
-    static const char *add_typedef() { return "typedef double4 Type;\n"; }
-    using scalar_type = cl_double;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-template <>
-struct TypeManager<cl_double8> : public CommonTypeManager<cl_double8>
-    static const char *name() { return "double8"; }
-    static const char *add_typedef() { return "typedef double8 Type;\n"; }
-    using scalar_type = cl_double;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-template <>
-struct TypeManager<cl_double16> : public CommonTypeManager<cl_double16>
-    static const char *name() { return "double16"; }
-    static const char *add_typedef() { return "typedef double16 Type;\n"; }
-    using scalar_type = cl_double;
-    typedef std::true_type is_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return double_ok(device);
-    }
-// cl_half
-template <>
-struct TypeManager<subgroups::cl_half>
-    : public CommonTypeManager<subgroups::cl_half>
-    static const char *name() { return "half"; }
-    static const char *add_typedef() { return "typedef half Type;\n"; }
-    typedef std::true_type is_sb_scalar_type;
-    static subgroups::cl_half identify_limits(ArithmeticOp operation)
-    {
-        switch (operation)
-        {
-            case ArithmeticOp::add_: return { 0x0000 };
-            case ArithmeticOp::max_: return { 0xfc00 };
-            case ArithmeticOp::min_: return { 0x7c00 };
-            case ArithmeticOp::mul_: return { 0x3c00 };
-            default: log_error("Unknown operation request"); break;
-        }
-        return { 0 };
-    }
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-template <>
-struct TypeManager<subgroups::cl_half2>
-    : public CommonTypeManager<subgroups::cl_half2>
-    static const char *name() { return "half2"; }
-    static const char *add_typedef() { return "typedef half2 Type;\n"; }
-    using scalar_type = subgroups::cl_half;
-    typedef std::true_type is_sb_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-template <>
-struct TypeManager<subgroups::cl_half3>
-    : public CommonTypeManager<subgroups::cl_half3>
-    static const char *name() { return "half3"; }
-    static const char *add_typedef() { return "typedef half3 Type;\n"; }
-    typedef std::true_type is_sb_vector_size3;
-    using scalar_type = subgroups::cl_half;
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-template <>
-struct TypeManager<subgroups::cl_half4>
-    : public CommonTypeManager<subgroups::cl_half4>
-    static const char *name() { return "half4"; }
-    static const char *add_typedef() { return "typedef half4 Type;\n"; }
-    using scalar_type = subgroups::cl_half;
-    typedef std::true_type is_sb_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-template <>
-struct TypeManager<subgroups::cl_half8>
-    : public CommonTypeManager<subgroups::cl_half8>
-    static const char *name() { return "half8"; }
-    static const char *add_typedef() { return "typedef half8 Type;\n"; }
-    using scalar_type = subgroups::cl_half;
-    typedef std::true_type is_sb_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-template <>
-struct TypeManager<subgroups::cl_half16>
-    : public CommonTypeManager<subgroups::cl_half16>
-    static const char *name() { return "half16"; }
-    static const char *add_typedef() { return "typedef half16 Type;\n"; }
-    using scalar_type = subgroups::cl_half;
-    typedef std::true_type is_sb_vector_type;
-    static const bool type_supported(cl_device_id device)
-    {
-        return half_ok(device);
-    }
-// set scalar value to vector of halfs
-template <typename Ty, int N = 0>
-typename std::enable_if<TypeManager<Ty>::is_sb_vector_type::value>::type
-set_value(Ty &lhs, const cl_ulong &rhs)
-    const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
-    for (auto i = 0; i < size; ++i)
-    {
-[i] = rhs;
-    }
-// set scalar value to vector
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_vector_type::value>::type
-set_value(Ty &lhs, const cl_ulong &rhs)
-    const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
-    for (auto i = 0; i < size; ++i)
-    {
-        lhs.s[i] = rhs;
-    }
-// set vector to vector value
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_vector_type::value>::type
-set_value(Ty &lhs, const Ty &rhs)
-    lhs = rhs;
-// set scalar value to vector size 3
-template <typename Ty, int N = 0>
-typename std::enable_if<TypeManager<Ty>::is_sb_vector_size3::value>::type
-set_value(Ty &lhs, const cl_ulong &rhs)
-    for (auto i = 0; i < 3; ++i)
-    {
-[i] = rhs;
-    }
-// set scalar value to scalar
-template <typename Ty>
-typename std::enable_if<std::is_scalar<Ty>::value>::type
-set_value(Ty &lhs, const cl_ulong &rhs)
-    lhs = static_cast<Ty>(rhs);
-// set scalar value to half scalar
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_sb_scalar_type::value>::type
-set_value(Ty &lhs, const cl_ulong &rhs)
- = rhs;
-// compare for common vectors
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_vector_type::value, bool>::type
-compare(const Ty &lhs, const Ty &rhs)
-    const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
-    for (auto i = 0; i < size; ++i)
-    {
-        if (lhs.s[i] != rhs.s[i])
-        {
+            log_info(
+                "clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
             return false;
+        return c != 0;
-    return true;
-// compare for vectors 3
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_sb_vector_size3::value, bool>::type
-compare(const Ty &lhs, const Ty &rhs)
-    for (auto i = 0; i < 3; ++i)
-    {
-        if ([i] !=[i])
-        {
-            return false;
-        }
-    }
-    return true;
-// compare for half vectors
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_sb_vector_type::value, bool>::type
-compare(const Ty &lhs, const Ty &rhs)
-    const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
-    for (auto i = 0; i < size; ++i)
-    {
-        if ([i] !=[i])
-        {
-            return false;
-        }
-    }
-    return true;
-// compare for scalars
-template <typename Ty>
-typename std::enable_if<std::is_scalar<Ty>::value, bool>::type
-compare(const Ty &lhs, const Ty &rhs)
-    return lhs == rhs;
-// compare for scalar halfs
-template <typename Ty>
-typename std::enable_if<TypeManager<Ty>::is_sb_scalar_type::value, bool>::type
-compare(const Ty &lhs, const Ty &rhs)
-    return ==;
-template <typename Ty> inline bool compare_ordered(const Ty &lhs, const Ty &rhs)
-    return lhs == rhs;
-template <>
-inline bool compare_ordered(const subgroups::cl_half &lhs,
-                            const subgroups::cl_half &rhs)
-    return cl_half_to_float( == cl_half_to_float(;
-template <typename Ty>
-inline bool compare_ordered(const subgroups::cl_half &lhs, const int &rhs)
-    return cl_half_to_float( == rhs;
 // Run a test kernel to compute the result of a built-in on an input
 static int run_kernel(cl_context context, cl_command_queue queue,
@@ -1211,9 +318,6 @@
     test_error(error, "clEnqueueWriteBuffer failed");
-    error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
-                                 NULL);
-    test_error(error, "clEnqueueWriteBuffer failed");
     error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
                                    NULL, NULL);
     test_error(error, "clEnqueueNDRangeKernel failed");
@@ -1233,114 +337,61 @@
 // Driver for testing a single built in function
-template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
+template <typename Ty, typename Fns, size_t GSIZE, size_t LSIZE,
+          size_t TSIZE = 0>
+struct test
-    static int mrun(cl_device_id device, cl_context context,
-                    cl_command_queue queue, int num_elements, const char *kname,
-                    const char *src, WorkGroupParams test_params)
-    {
-        int error = TEST_PASS;
-        for (auto &mask : test_params.all_work_item_masks)
-        {
-            test_params.work_items_mask = mask;
-            error |= run(device, context, queue, num_elements, kname, src,
-                         test_params);
-        }
-        return error;
-    };
     static int run(cl_device_id device, cl_context context,
                    cl_command_queue queue, int num_elements, const char *kname,
-                   const char *src, WorkGroupParams test_params)
+                   const char *src, int dynscl, bool useCoreSubgroups)
         size_t tmp;
         int error;
         int subgroup_size, num_subgroups;
         size_t realSize;
-        size_t global = test_params.global_workgroup_size;
-        size_t local = test_params.local_workgroup_size;
+        size_t global;
+        size_t local;
         clProgramWrapper program;
         clKernelWrapper kernel;
         cl_platform_id platform;
-        std::vector<cl_int> sgmap;
-        sgmap.resize(4 * global);
-        std::vector<Ty> mapin;
-        mapin.resize(local);
-        std::vector<Ty> mapout;
-        mapout.resize(local);
-        std::stringstream kernel_sstr;
-        if (test_params.work_items_mask != 0)
-        {
-            kernel_sstr << "#define WORK_ITEMS_MASK ";
-            kernel_sstr << "0x" << std::hex << test_params.work_items_mask
-                        << "\n";
-        }
+        cl_int sgmap[2 * GSIZE];
+        Ty mapin[LSIZE];
+        Ty mapout[LSIZE];
-        kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS ";
-        kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n";
         // Make sure a test of type Ty is supported by the device
-        if (!TypeManager<Ty>::type_supported(device))
-        {
-            log_info("Data type not supported : %s\n", TypeManager<Ty>::name());
-            return 0;
-        }
-        else
-        {
-            if (strstr(TypeManager<Ty>::name(), "double"))
-            {
-                kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
-            }
-            else if (strstr(TypeManager<Ty>::name(), "half"))
-            {
-                kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n";
-            }
-        }
-        for (std::string extension : test_params.required_extensions)
-        {
-            if (!is_extension_available(device, extension.c_str()))
-            {
-                log_info("The extension %s not supported on this device. SKIP "
-                         "testing - kernel %s data type %s\n",
-                         extension.c_str(), kname, TypeManager<Ty>::name());
-                return TEST_PASS;
-            }
-            kernel_sstr << "#pragma OPENCL EXTENSION " + extension
-                    + ": enable\n";
-        }
+        if (!TypeCheck<Ty>::val(device)) return 0;
         error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
                                 (void *)&platform, NULL);
         test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
-        if (test_params.use_core_subgroups)
+        std::stringstream kernel_sstr;
+        if (useCoreSubgroups)
                 << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
         kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); "
                        "M[I].y = get_sub_group_id();\n";
-        kernel_sstr << TypeManager<Ty>::add_typedef();
+        kernel_sstr << TypeDef<Ty>::val();
         kernel_sstr << src;
         const std::string &kernel_str = kernel_sstr.str();
         const char *kernel_src = kernel_str.c_str();
-        error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                            &kernel_src, kname);
+        error = create_single_kernel_helper_with_build_options(
+            context, &program, &kernel, 1, &kernel_src, kname, "-cl-std=CL2.0");
         if (error != 0) return error;
         // Determine some local dimensions to use for the test.
-        error = get_max_common_work_group_size(
-            context, kernel, test_params.global_workgroup_size, &local);
+        global = GSIZE;
+        error = get_max_common_work_group_size(context, kernel, GSIZE, &local);
         test_error(error, "get_max_common_work_group_size failed");
         // Limit it a bit so we have muliple work groups
-        // Ideally this will still be large enough to give us multiple
-        if (local > test_params.local_workgroup_size)
-            local = test_params.local_workgroup_size;
+        // Ideally this will still be large enough to give us multiple subgroups
+        if (local > LSIZE) local = LSIZE;
         // Get the sub group info
-        subgroupsAPI subgroupsApiSet(platform, test_params.use_core_subgroups);
+        subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups);
         clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
         if (clGetKernelSubGroupInfo_ptr == NULL)
@@ -1384,9 +435,8 @@
         std::vector<Ty> idata;
         std::vector<Ty> odata;
-        size_t input_array_size = global;
-        size_t output_array_size = global;
-        int dynscl = test_params.dynsc;
+        size_t input_array_size = GSIZE;
+        size_t output_array_size = GSIZE;
         if (dynscl != 0)
@@ -1399,96 +449,28 @@
         // Run the kernel once on zeroes to get the map
-        memset(, 0, input_array_size * sizeof(Ty));
-        error = run_kernel(context, queue, kernel, global, local,,
-                           input_array_size * sizeof(Ty),,
-                           global * sizeof(cl_int4),,
+        memset(&idata[0], 0, input_array_size * sizeof(Ty));
+        error = run_kernel(context, queue, kernel, global, local, &idata[0],
+                           input_array_size * sizeof(Ty), sgmap,
+                           global * sizeof(cl_int) * 2, &odata[0],
                            output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
-        test_error(error, "Running kernel first time failed");
+        if (error) return error;
         // Generate the desired input for the kernel
+        Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local,
+                 (int)global / (int)local);
-        test_params.subgroup_size = subgroup_size;
-        Fns::gen(,,, test_params);
-        error = run_kernel(context, queue, kernel, global, local,,
-                           input_array_size * sizeof(Ty),,
-                           global * sizeof(cl_int4),,
+        error = run_kernel(context, queue, kernel, global, local, &idata[0],
+                           input_array_size * sizeof(Ty), sgmap,
+                           global * sizeof(cl_int) * 2, &odata[0],
                            output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
-        test_error(error, "Running kernel second time failed");
+        if (error) return error;
         // Check the result
-        error = Fns::chk(,,,
-               ,, test_params);
-        test_error(error, "Data verification failed");
-        return TEST_PASS;
+        return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap,
+                        subgroup_size, (int)local, (int)global / (int)local);
-static void set_last_workgroup_params(int non_uniform_size,
-                                      int &number_of_subgroups,
-                                      int subgroup_size, int &workgroup_size,
-                                      int &last_subgroup_size)
-    number_of_subgroups = 1 + non_uniform_size / subgroup_size;
-    last_subgroup_size = non_uniform_size % subgroup_size;
-    workgroup_size = non_uniform_size;
-template <typename Ty>
-static void set_randomdata_for_subgroup(Ty *workgroup, int wg_offset,
-                                        int current_sbs)
-    int randomize_data = (int)(genrand_int32(gMTdata) % 3);
-    // Initialize data matrix indexed by local id and sub group id
-    switch (randomize_data)
-    {
-        case 0:
-            memset(&workgroup[wg_offset], 0, current_sbs * sizeof(Ty));
-            break;
-        case 1: {
-            memset(&workgroup[wg_offset], 0, current_sbs * sizeof(Ty));
-            int wi_id = (int)(genrand_int32(gMTdata) % (cl_uint)current_sbs);
-            set_value(workgroup[wg_offset + wi_id], 41);
-        }
-        break;
-        case 2:
-            memset(&workgroup[wg_offset], 0xff, current_sbs * sizeof(Ty));
-            break;
-    }
-struct RunTestForType
-    RunTestForType(cl_device_id device, cl_context context,
-                   cl_command_queue queue, int num_elements,
-                   WorkGroupParams test_params)
-        : device_(device), context_(context), queue_(queue),
-          num_elements_(num_elements), test_params_(test_params)
-    {}
-    template <typename T, typename U>
-    int run_impl(const char *kernel_name, const char *source)
-    {
-        int error = TEST_PASS;
-        if (test_params_.all_work_item_masks.size() > 0)
-        {
-            error = test<T, U>::mrun(device_, context_, queue_, num_elements_,
-                                     kernel_name, source, test_params_);
-        }
-        else
-        {
-            error = test<T, U>::run(device_, context_, queue_, num_elements_,
-                                    kernel_name, source, test_params_);
-        }
-        return error;
-    }
-    cl_device_id device_;
-    cl_context context_;
-    cl_command_queue queue_;
-    int num_elements_;
-    WorkGroupParams test_params_;
diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp
index 47e42f6..e6ce1d2 100644
--- a/test_conformance/subgroups/test_barrier.cpp
+++ b/test_conformance/subgroups/test_barrier.cpp
@@ -59,15 +59,10 @@
 // barrier test functions
 template <int Which> struct BAR
-    static void gen(cl_int *x, cl_int *t, cl_int *m,
-                    const WorkGroupParams &test_params)
+    static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
         int i, ii, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
         int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
         int e;
         ii = 0;
@@ -84,7 +79,8 @@
             // Now map into work group using map from device
             for (j = 0; j < nw; ++j)
-                x[j] = t[j];
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
             x += nw;
@@ -93,14 +89,10 @@
     static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+                   int ns, int nw, int ng)
         int ii, i, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
         int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
         cl_int tr, rr;
         if (Which == 0)
@@ -113,8 +105,9 @@
             // Map to array indexed to array indexed by local ID and sub group
             for (j = 0; j < nw; ++j)
-                mx[j] = x[j];
-                my[j] = y[j];
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
             for (j = 0; j < nj; ++j)
@@ -130,9 +123,8 @@
                     if (tr != rr)
                         log_error("ERROR: sub_group_barrier mismatch for local "
-                                  "id %d in sub group %d in group %d expected "
-                                  "%d got %d\n",
-                                  i, j, k, tr, rr);
+                                  "id %d in sub group %d in group %d\n",
+                                  i, j, k);
                         return -1;
@@ -152,18 +144,18 @@
                            cl_command_queue queue, int num_elements,
                            bool useCoreSubgroups)
-    int error = TEST_PASS;
+    int error;
     // Adjust these individually below if desired/needed
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size);
-    test_params.use_core_subgroups = useCoreSubgroups;
-    error = test<cl_int, BAR<0>>::run(device, context, queue, num_elements,
-                                      "test_lbar", lbar_source, test_params);
-    error |= test<cl_int, BAR<1>, global_work_size>::run(
-        device, context, queue, num_elements, "test_gbar", gbar_source,
-        test_params);
+#define G 2000
+#define L 200
+    error = test<cl_int, BAR<0>, G, L>::run(device, context, queue,
+                                            num_elements, "test_lbar",
+                                            lbar_source, 0, useCoreSubgroups);
+    error = test<cl_int, BAR<1>, G, L, G>::run(
+        device, context, queue, num_elements, "test_gbar", gbar_source, 0,
+        useCoreSubgroups);
     return error;
diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp
index 428f2cd..02850e5 100644
--- a/test_conformance/subgroups/test_ifp.cpp
+++ b/test_conformance/subgroups/test_ifp.cpp
@@ -46,7 +46,7 @@
     "#define INST_COUNT 0x3\n"
     "__kernel void\n"
-    "test_ifp(const __global int *in, __global int4 *xy, __global int *out)\n"
+    "test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
     "    __local atomic_int loc[NUM_LOC];\n"
@@ -225,15 +225,10 @@
 struct IFP
-    static void gen(cl_int *x, cl_int *t, cl_int *,
-                    const WorkGroupParams &test_params)
+    static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
         int k;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
         int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
         // We need at least 2 sub groups per group for this test
         if (nj == 1) return;
@@ -245,15 +240,11 @@
-    static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *,
-                   const WorkGroupParams &test_params)
+    static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns,
+                   int nw, int ng)
         int i, k;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
         int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
         // We need at least 2 sub groups per group for this tes
         if (nj == 1) return 0;
@@ -284,17 +275,14 @@
 int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue,
              int num_elements, bool useCoreSubgroups)
-    int error = TEST_PASS;
+    int error;
-    // Global/local work group sizes
     // Adjust these individually below if desired/needed
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size);
-    test_params.use_core_subgroups = useCoreSubgroups;
-    test_params.dynsc = NUM_LOC + 1;
-    error = test<cl_int, IFP>::run(device, context, queue, num_elements,
-                                   "test_ifp", ifp_source, test_params);
+#define G 2000
+#define L 200
+    error = test<cl_int, IFP, G, L>::run(device, context, queue, num_elements,
+                                         "test_ifp", ifp_source, NUM_LOC + 1,
+                                         useCoreSubgroups);
     return error;
diff --git a/test_conformance/subgroups/test_queries.cpp b/test_conformance/subgroups/test_queries.cpp
index 761ca7a..2ad3d7f 100644
--- a/test_conformance/subgroups/test_queries.cpp
+++ b/test_conformance/subgroups/test_queries.cpp
@@ -67,8 +67,9 @@
     const std::string &kernel_str = kernel_sstr.str();
     const char *kernel_src = kernel_str.c_str();
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &kernel_src, "query_kernel");
+    error = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel, 1, &kernel_src, "query_kernel",
+        "-cl-std=CL2.0");
     if (error != 0) return error;
     // Determine some local dimensions to use for the test.
diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp
deleted file mode 100644
index c0e4952..0000000
--- a/test_conformance/subgroups/test_subgroup.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_kernels.h"
-#include "subgroup_common_templates.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-namespace {
-// Any/All test functions
-template <NonUniformVoteOp operation> struct AA
-    static void gen(cl_int *x, cl_int *t, cl_int *m,
-                    const WorkGroupParams &test_params)
-    {
-        int i, ii, j, k, n;
-        int ng = test_params.global_workgroup_size;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        int e;
-        ng = ng / nw;
-        ii = 0;
-        log_info("  sub_group_%s...\n", operation_names(operation));
-        for (k = 0; k < ng; ++k)
-        {
-            for (j = 0; j < nj; ++j)
-            {
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                e = (int)(genrand_int32(gMTdata) % 3);
-                // Initialize data matrix indexed by local id and sub group id
-                switch (e)
-                {
-                    case 0: memset(&t[ii], 0, n * sizeof(cl_int)); break;
-                    case 1:
-                        memset(&t[ii], 0, n * sizeof(cl_int));
-                        i = (int)(genrand_int32(gMTdata) % (cl_uint)n);
-                        t[ii + i] = 41;
-                        break;
-                    case 2: memset(&t[ii], 0xff, n * sizeof(cl_int)); break;
-                }
-            }
-            // Now map into work group using map from device
-            for (j = 0; j < nw; ++j)
-            {
-                x[j] = t[j];
-            }
-            x += nw;
-            m += 4 * nw;
-        }
-    }
-    static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, n;
-        int ng = test_params.global_workgroup_size;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        cl_int taa, raa;
-        ng = ng / nw;
-        for (k = 0; k < ng; ++k)
-        {
-            // Map to array indexed to array indexed by local ID and sub group
-            for (j = 0; j < nw; ++j)
-            {
-                mx[j] = x[j];
-                my[j] = y[j];
-            }
-            for (j = 0; j < nj; ++j)
-            {
-                ii = j * ns;
-                n = ii + ns > nw ? nw - ii : ns;
-                // Compute target
-                if (operation == NonUniformVoteOp::any)
-                {
-                    taa = 0;
-                    for (i = 0; i < n; ++i) taa |= mx[ii + i] != 0;
-                }
-                if (operation == NonUniformVoteOp::all)
-                {
-                    taa = 1;
-                    for (i = 0; i < n; ++i) taa &= mx[ii + i] != 0;
-                }
-                // Check result
-                for (i = 0; i < n; ++i)
-                {
-                    raa = my[ii + i] != 0;
-                    if (raa != taa)
-                    {
-                        log_error("ERROR: sub_group_%s mismatch for local id "
-                                  "%d in sub group %d in group %d\n",
-                                  operation_names(operation), i, j, k);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  sub_group_%s... passed\n", operation_names(operation));
-        return TEST_PASS;
-    }
-static const char *any_source = "__kernel void test_any(const __global Type "
-                                "*in, __global int4 *xy, __global Type *out)\n"
-                                "{\n"
-                                "    int gid = get_global_id(0);\n"
-                                "    XY(xy,gid);\n"
-                                "    out[gid] = sub_group_any(in[gid]);\n"
-                                "}\n";
-static const char *all_source = "__kernel void test_all(const __global Type "
-                                "*in, __global int4 *xy, __global Type *out)\n"
-                                "{\n"
-                                "    int gid = get_global_id(0);\n"
-                                "    XY(xy,gid);\n"
-                                "    out[gid] = sub_group_all(in[gid]);\n"
-                                "}\n";
-template <typename T>
-int run_broadcast_scan_reduction_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, BC<T, SubgroupsBroadcastOp::broadcast>>(
-        "test_bcast", bcast_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("test_redadd",
-                                                            redadd_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("test_redmax",
-                                                            redmax_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("test_redmin",
-                                                            redmin_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>("test_scinadd",
-                                                             scinadd_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>("test_scinmax",
-                                                             scinmax_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>("test_scinmin",
-                                                             scinmin_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>("test_scexadd",
-                                                             scexadd_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>("test_scexmax",
-                                                             scexmax_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>("test_scexmin",
-                                                             scexmin_source);
-    return error;
-// Entry point from main
-int test_subgroup_functions(cl_device_id device, cl_context context,
-                            cl_command_queue queue, int num_elements,
-                            bool useCoreSubgroups)
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-    int error =
-        rft.run_impl<cl_int, AA<NonUniformVoteOp::any>>("test_any", any_source);
-    error |=
-        rft.run_impl<cl_int, AA<NonUniformVoteOp::all>>("test_all", all_source);
-    error |= run_broadcast_scan_reduction_for_type<cl_int>(rft);
-    error |= run_broadcast_scan_reduction_for_type<cl_uint>(rft);
-    error |= run_broadcast_scan_reduction_for_type<cl_long>(rft);
-    error |= run_broadcast_scan_reduction_for_type<cl_ulong>(rft);
-    error |= run_broadcast_scan_reduction_for_type<cl_float>(rft);
-    error |= run_broadcast_scan_reduction_for_type<cl_double>(rft);
-    error |= run_broadcast_scan_reduction_for_type<subgroups::cl_half>(rft);
-    return error;
-int test_subgroup_functions_core(cl_device_id device, cl_context context,
-                                 cl_command_queue queue, int num_elements)
-    return test_subgroup_functions(device, context, queue, num_elements, true);
-int test_subgroup_functions_ext(cl_device_id device, cl_context context,
-                                cl_command_queue queue, int num_elements)
-    bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
-    if (!hasExtension)
-    {
-        log_info(
-            "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-    return test_subgroup_functions(device, context, queue, num_elements, false);
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
deleted file mode 100644
index f2e4060..0000000
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ /dev/null
@@ -1,1089 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_templates.h"
-#include "harness/typeWrappers.h"
-#include <bitset>
-namespace {
-// Test for ballot functions
-template <typename Ty> struct BALLOT
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        // no work here
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int non_uniform_size = gws % lws;
-        log_info("  sub_group_ballot...\n");
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int current_sbs = 0;
-        cl_uint expected_result, device_result;
-        int non_uniform_size = gws % lws;
-        int wg_number = gws / lws;
-        wg_number = non_uniform_size ? wg_number + 1 : wg_number;
-        int last_subgroup_size = 0;
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            if (non_uniform_size && wg_id == wg_number - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
-                                          last_subgroup_size);
-            }
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            { // inside the work_group
-                // read device outputs for work_group
-                my[wi_id] = y[wi_id];
-            }
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                if (last_subgroup_size && sb_id == sb_number - 1)
-                {
-                    current_sbs = last_subgroup_size;
-                }
-                else
-                {
-                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                }
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                {
-                    device_result = my[wg_offset + wi_id];
-                    expected_result = 1;
-                    if (!compare(device_result, expected_result))
-                    {
-                        log_error(
-                            "ERROR: sub_group_ballot mismatch for local id "
-                            "%d in sub group %d in group %d obtained {%d}, "
-                            "expected {%d} \n",
-                            wi_id, sb_id, wg_id, device_result,
-                            expected_result);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            y += lws;
-            m += 4 * lws;
-        }
-        log_info("  sub_group_ballot... passed\n");
-        return TEST_PASS;
-    }
-// Test for bit extract ballot functions
-template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int wi_id, sb_id, wg_id, l;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int wg_number = gws / lws;
-        int limit_sbs = sbs > 100 ? 100 : sbs;
-        int non_uniform_size = gws % lws;
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                int current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                // rand index to bit extract
-                int index_for_odd = (int)(genrand_int32(gMTdata) & 0x7fffffff)
-                    % (limit_sbs > current_sbs ? current_sbs : limit_sbs);
-                int index_for_even = (int)(genrand_int32(gMTdata) & 0x7fffffff)
-                    % (limit_sbs > current_sbs ? current_sbs : limit_sbs);
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                {
-                    // index of the third element int the vector.
-                    int midx = 4 * wg_offset + 4 * wi_id + 2;
-                    // storing information about index to bit extract
-                    m[midx] = (cl_int)index_for_odd;
-                    m[++midx] = (cl_int)index_for_even;
-                }
-                set_randomdata_for_subgroup<Ty>(t, wg_offset, current_sbs);
-            }
-            // Now map into work group using map from device
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            {
-                x[wi_id] = t[wi_id];
-            }
-            x += lws;
-            m += 4 * lws;
-        }
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, l, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int wg_number = gws / lws;
-        cl_uint4 expected_result, device_result;
-        int last_subgroup_size = 0;
-        int current_sbs = 0;
-        int non_uniform_size = gws % lws;
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            if (non_uniform_size && wg_id == wg_number - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
-                                          last_subgroup_size);
-            }
-            // Map to array indexed to array indexed by local ID and sub group
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            { // inside the work_group
-                // read host inputs for work_group
-                mx[wi_id] = x[wi_id];
-                // read device outputs for work_group
-                my[wi_id] = y[wi_id];
-            }
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                if (last_subgroup_size && sb_id == sb_number - 1)
-                {
-                    current_sbs = last_subgroup_size;
-                }
-                else
-                {
-                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                }
-                // take index of array where info which work_item will
-                // be broadcast its value is stored
-                int midx = 4 * wg_offset + 2;
-                // take subgroup local id of this work_item
-                int index_for_odd = (int)m[midx];
-                int index_for_even = (int)m[++midx];
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                { // for each subgroup
-                    int bit_value = 0;
-                    // from which value of bitfield bit
-                    // verification will be done
-                    int take_shift =
-                        (wi_id & 1) ? index_for_odd % 32 : index_for_even % 32;
-                    int bit_mask = 1 << take_shift;
-                    if (wi_id < 32)
-                        (mx[wg_offset + wi_id].s0 & bit_mask) > 0
-                            ? bit_value = 1
-                            : bit_value = 0;
-                    if (wi_id >= 32 && wi_id < 64)
-                        (mx[wg_offset + wi_id].s1 & bit_mask) > 0
-                            ? bit_value = 1
-                            : bit_value = 0;
-                    if (wi_id >= 64 && wi_id < 96)
-                        (mx[wg_offset + wi_id].s2 & bit_mask) > 0
-                            ? bit_value = 1
-                            : bit_value = 0;
-                    if (wi_id >= 96 && wi_id < 128)
-                        (mx[wg_offset + wi_id].s3 & bit_mask) > 0
-                            ? bit_value = 1
-                            : bit_value = 0;
-                    if (wi_id & 1)
-                    {
-                        bit_value ? expected_result = { 1, 0, 0, 1 }
-                                  : expected_result = { 0, 0, 0, 1 };
-                    }
-                    else
-                    {
-                        bit_value ? expected_result = { 1, 0, 0, 2 }
-                                  : expected_result = { 0, 0, 0, 2 };
-                    }
-                    device_result = my[wg_offset + wi_id];
-                    if (!compare(device_result, expected_result))
-                    {
-                        log_error(
-                            "ERROR: sub_group_%s mismatch for local id %d in "
-                            "sub group %d in group %d obtained {%d, %d, %d, "
-                            "%d}, expected {%d, %d, %d, %d}\n",
-                            operation_names(operation), wi_id, sb_id, wg_id,
-                            device_result.s0, device_result.s1,
-                            device_result.s2, device_result.s3,
-                            expected_result.s0, expected_result.s1,
-                            expected_result.s2, expected_result.s3);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += lws;
-            y += lws;
-            m += 4 * lws;
-        }
-        log_info("  sub_group_%s(%s)... passed\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int non_uniform_size = gws % lws;
-        log_info("  sub_group_inverse_ballot...\n");
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
-        // no work here
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        cl_uint4 expected_result, device_result;
-        int non_uniform_size = gws % lws;
-        int wg_number = gws / lws;
-        int last_subgroup_size = 0;
-        int current_sbs = 0;
-        if (non_uniform_size) wg_number++;
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            if (non_uniform_size && wg_id == wg_number - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
-                                          last_subgroup_size);
-            }
-            // Map to array indexed to array indexed by local ID and sub group
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            { // inside the work_group
-                mx[wi_id] = x[wi_id]; // read host inputs for work_group
-                my[wi_id] = y[wi_id]; // read device outputs for work_group
-            }
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                if (last_subgroup_size && sb_id == sb_number - 1)
-                {
-                    current_sbs = last_subgroup_size;
-                }
-                else
-                {
-                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                }
-                // take index of array where info which work_item will
-                // be broadcast its value is stored
-                int midx = 4 * wg_offset + 2;
-                // take subgroup local id of this work_item
-                // Check result
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                { // for each subgroup work item
-                    wi_id & 1 ? expected_result = { 1, 0, 0, 1 }
-                              : expected_result = { 1, 0, 0, 2 };
-                    device_result = my[wg_offset + wi_id];
-                    if (!compare(device_result, expected_result))
-                    {
-                        log_error(
-                            "ERROR: sub_group_%s mismatch for local id %d in "
-                            "sub group %d in group %d obtained {%d, %d, %d, "
-                            "%d}, expected {%d, %d, %d, %d}\n",
-                            operation_names(operation), wi_id, sb_id, wg_id,
-                            device_result.s0, device_result.s1,
-                            device_result.s2, device_result.s3,
-                            expected_result.s0, expected_result.s1,
-                            expected_result.s2, expected_result.s3);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += lws;
-            y += lws;
-            m += 4 * lws;
-        }
-        log_info("  sub_group_inverse_ballot... passed\n");
-        return TEST_PASS;
-    }
-// Test for bit count/inclusive and exclusive scan/ find lsb msb ballot function
-template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int non_uniform_size = gws % lws;
-        int wg_number = gws / lws;
-        int last_subgroup_size = 0;
-        int current_sbs = 0;
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-            wg_number++;
-        }
-        int e;
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            if (non_uniform_size && wg_id == wg_number - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
-                                          last_subgroup_size);
-            }
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                if (last_subgroup_size && sb_id == sb_number - 1)
-                {
-                    current_sbs = last_subgroup_size;
-                }
-                else
-                {
-                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                }
-                if (operation == BallotOp::ballot_bit_count
-                    || operation == BallotOp::ballot_inclusive_scan
-                    || operation == BallotOp::ballot_exclusive_scan)
-                {
-                    set_randomdata_for_subgroup<Ty>(t, wg_offset, current_sbs);
-                }
-                else if (operation == BallotOp::ballot_find_lsb
-                         || operation == BallotOp::ballot_find_msb)
-                {
-                    // Regarding to the spec, find lsb and find msb result is
-                    // undefined behavior if input value is zero, so generate
-                    // only non-zero values.
-                    for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                    {
-                        char x = (genrand_int32(gMTdata)) & 0xff;
-                        // undefined behaviour in case of 0;
-                        x = x ? x : 1;
-                        memset(&t[wg_offset + wi_id], x, sizeof(Ty));
-                    }
-                }
-                else
-                {
-                    log_error("Unknown operation...");
-                }
-            }
-            // Now map into work group using map from device
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            {
-                x[wi_id] = t[wi_id];
-            }
-            x += lws;
-            m += 4 * lws;
-        }
-    }
-    static bs128 getImportantBits(cl_uint sub_group_local_id,
-                                  cl_uint sub_group_size)
-    {
-        bs128 mask;
-        if (operation == BallotOp::ballot_bit_count
-            || operation == BallotOp::ballot_find_lsb
-            || operation == BallotOp::ballot_find_msb)
-        {
-            for (cl_uint i = 0; i < sub_group_size; ++i) mask.set(i);
-        }
-        else if (operation == BallotOp::ballot_inclusive_scan
-                 || operation == BallotOp::ballot_exclusive_scan)
-        {
-            for (cl_uint i = 0; i <= sub_group_local_id; ++i) mask.set(i);
-            if (operation == BallotOp::ballot_exclusive_scan)
-                mask.reset(sub_group_local_id);
-        }
-        return mask;
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int non_uniform_size = gws % lws;
-        int wg_number = gws / lws;
-        wg_number = non_uniform_size ? wg_number + 1 : wg_number;
-        cl_uint4 expected_result, device_result;
-        int last_subgroup_size = 0;
-        int current_sbs = 0;
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            if (non_uniform_size && wg_id == wg_number - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
-                                          last_subgroup_size);
-            }
-            // Map to array indexed to array indexed by local ID and sub group
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            { // inside the work_group
-                // read host inputs for work_group
-                mx[wi_id] = x[wi_id];
-                // read device outputs for work_group
-                my[wi_id] = y[wi_id];
-            }
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                if (last_subgroup_size && sb_id == sb_number - 1)
-                {
-                    current_sbs = last_subgroup_size;
-                }
-                else
-                {
-                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                }
-                // Check result
-                expected_result = { 0, 0, 0, 0 };
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                { // for subgroup element
-                    bs128 bs;
-                    // convert cl_uint4 input into std::bitset<128>
-                    bs |= bs128(mx[wg_offset + wi_id].s0)
-                        | (bs128(mx[wg_offset + wi_id].s1) << 32)
-                        | (bs128(mx[wg_offset + wi_id].s2) << 64)
-                        | (bs128(mx[wg_offset + wi_id].s3) << 96);
-                    bs &= getImportantBits(wi_id, current_sbs);
-                    device_result = my[wg_offset + wi_id];
-                    if (operation == BallotOp::ballot_inclusive_scan
-                        || operation == BallotOp::ballot_exclusive_scan
-                        || operation == BallotOp::ballot_bit_count)
-                    {
-                        expected_result.s0 = bs.count();
-                        if (!compare(device_result, expected_result))
-                        {
-                            log_error("ERROR: sub_group_%s "
-                                      "mismatch for local id %d in sub group "
-                                      "%d in group %d obtained {%d, %d, %d, "
-                                      "%d}, expected {%d, %d, %d, %d}\n",
-                                      operation_names(operation), wi_id, sb_id,
-                                      wg_id, device_result.s0, device_result.s1,
-                                      device_result.s2, device_result.s3,
-                                      expected_result.s0, expected_result.s1,
-                                      expected_result.s2, expected_result.s3);
-                            return TEST_FAIL;
-                        }
-                    }
-                    else if (operation == BallotOp::ballot_find_lsb)
-                    {
-                        for (int id = 0; id < current_sbs; ++id)
-                        {
-                            if (bs.test(id))
-                            {
-                                expected_result.s0 = id;
-                                break;
-                            }
-                        }
-                        if (!compare(device_result, expected_result))
-                        {
-                            log_error("ERROR: sub_group_ballot_find_lsb "
-                                      "mismatch for local id %d in sub group "
-                                      "%d in group %d obtained {%d, %d, %d, "
-                                      "%d}, expected {%d, %d, %d, %d}\n",
-                                      wi_id, sb_id, wg_id, device_result.s0,
-                                      device_result.s1, device_result.s2,
-                                      device_result.s3, expected_result.s0,
-                                      expected_result.s1, expected_result.s2,
-                                      expected_result.s3);
-                            return TEST_FAIL;
-                        }
-                    }
-                    else if (operation == BallotOp::ballot_find_msb)
-                    {
-                        for (int id = current_sbs - 1; id >= 0; --id)
-                        {
-                            if (bs.test(id))
-                            {
-                                expected_result.s0 = id;
-                                break;
-                            }
-                        }
-                        if (!compare(device_result, expected_result))
-                        {
-                            log_error("ERROR: sub_group_ballot_find_msb "
-                                      "mismatch for local id %d in sub group "
-                                      "%d in group %d obtained {%d, %d, %d, "
-                                      "%d}, expected {%d, %d, %d, %d}\n",
-                                      wi_id, sb_id, wg_id, device_result.s0,
-                                      device_result.s1, device_result.s2,
-                                      device_result.s3, expected_result.s0,
-                                      expected_result.s1, expected_result.s2,
-                                      expected_result.s3);
-                            return TEST_FAIL;
-                        }
-                    }
-                }
-            }
-            x += lws;
-            y += lws;
-            m += 4 * lws;
-        }
-        log_info("  sub_group_ballot_%s(%s)... passed\n",
-                 operation_names(operation), TypeManager<Ty>::name());
-        return TEST_PASS;
-    }
-// test mask functions
-template <typename Ty, BallotOp operation> struct SMASK
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, l, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        int wg_number = gws / lws;
-        log_info("  get_sub_group_%s_mask...\n", operation_names(operation));
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            { // for each subgroup
-                int wg_offset = sb_id * sbs;
-                int current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                // Produce expected masks for each work item in the subgroup
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                {
-                    int midx = 4 * wg_offset + 4 * wi_id;
-                    cl_uint max_sub_group_size = m[midx + 2];
-                    cl_uint4 expected_mask = { 0 };
-                    expected_mask = generate_bit_mask(
-                        wi_id, operation_names(operation), max_sub_group_size);
-                    set_value(t[wg_offset + wi_id], expected_mask);
-                }
-            }
-            // Now map into work group using map from device
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            {
-                x[wi_id] = t[wi_id];
-            }
-            x += lws;
-            m += 4 * lws;
-        }
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int wi_id, wg_id, sb_id;
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int sb_number = (lws + sbs - 1) / sbs;
-        Ty expected_result, device_result;
-        int wg_number = gws / lws;
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
-        { // for each work_group
-            for (wi_id = 0; wi_id < lws; ++wi_id)
-            { // inside the work_group
-                mx[wi_id] = x[wi_id]; // read host inputs for work_group
-                my[wi_id] = y[wi_id]; // read device outputs for work_group
-            }
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
-            {
-                int wg_offset = sb_id * sbs;
-                int current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
-                // Check result
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
-                { // inside the subgroup
-                    expected_result =
-                        mx[wg_offset + wi_id]; // read host input for subgroup
-                    device_result =
-                        my[wg_offset
-                           + wi_id]; // read device outputs for subgroup
-                    if (!compare(device_result, expected_result))
-                    {
-                        log_error("ERROR:  get_sub_group_%s_mask... mismatch "
-                                  "for local id %d in sub group %d in group "
-                                  "%d, obtained %d, expected %d\n",
-                                  operation_names(operation), wi_id, sb_id,
-                                  wg_id, device_result, expected_result);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += lws;
-            y += lws;
-            m += 4 * lws;
-        }
-        log_info("  get_sub_group_%s_mask... passed\n",
-                 operation_names(operation));
-        return TEST_PASS;
-    }
-static const char *bcast_non_uniform_source =
-    "__kernel void test_bcast_non_uniform(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {\n"
-    "        out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z);\n"
-    "    } else {\n"
-    "       out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w);\n"
-    "    }\n"
-    "}\n";
-static const char *bcast_first_source =
-    "__kernel void test_bcast_first(const __global Type *in, __global int4 "
-    "*xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {\n"
-    "       out[gid] = sub_group_broadcast_first(x);\n"
-    "    } else {\n"
-    "       out[gid] = sub_group_broadcast_first(x);\n"
-    "    }\n"
-    "}\n";
-static const char *ballot_bit_count_source =
-    "__kernel void test_sub_group_ballot_bit_count(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_bit_count(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-static const char *ballot_inclusive_scan_source =
-    "__kernel void test_sub_group_ballot_inclusive_scan(const __global Type "
-    "*in, __global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_inclusive_scan(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-static const char *ballot_exclusive_scan_source =
-    "__kernel void test_sub_group_ballot_exclusive_scan(const __global Type "
-    "*in, __global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_exclusive_scan(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-static const char *ballot_find_lsb_source =
-    "__kernel void test_sub_group_ballot_find_lsb(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_find_lsb(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-static const char *ballot_find_msb_source =
-    "__kernel void test_sub_group_ballot_find_msb(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);"
-    "    value = (uint4)(sub_group_ballot_find_msb(x),0,0,0);"
-    "    out[gid] = value ;"
-    "}\n";
-static const char *get_subgroup_ge_mask_source =
-    "__kernel void test_get_sub_group_ge_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_ge_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-static const char *get_subgroup_gt_mask_source =
-    "__kernel void test_get_sub_group_gt_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_gt_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-static const char *get_subgroup_le_mask_source =
-    "__kernel void test_get_sub_group_le_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_le_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-static const char *get_subgroup_lt_mask_source =
-    "__kernel void test_get_sub_group_lt_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_lt_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-static const char *get_subgroup_eq_mask_source =
-    "__kernel void test_get_sub_group_eq_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_eq_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-static const char *ballot_source =
-    "__kernel void test_sub_group_ballot(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "uint4 full_ballot = sub_group_ballot(1);\n"
-    "uint divergence_mask;\n"
-    "uint4 partial_ballot;\n"
-    "uint gid = get_global_id(0);"
-    "XY(xy,gid);\n"
-    "if (get_sub_group_local_id() & 1) {\n"
-    "    divergence_mask = 0xaaaaaaaa;\n"
-    "    partial_ballot = sub_group_ballot(1);\n"
-    "} else {\n"
-    "    divergence_mask = 0x55555555;\n"
-    "    partial_ballot = sub_group_ballot(1);\n"
-    "}\n"
-    " size_t lws = get_local_size(0);\n"
-    "uint4 masked_ballot = full_ballot;\n"
-    "masked_ballot.x &= divergence_mask;\n"
-    "masked_ballot.y &= divergence_mask;\n"
-    "masked_ballot.z &= divergence_mask;\n"
-    "masked_ballot.w &= divergence_mask;\n"
-    "out[gid] = all(masked_ballot == partial_ballot);\n"
-    "} \n";
-static const char *ballot_source_inverse =
-    "__kernel void test_sub_group_ballot_inverse(const __global "
-    "Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(10,0,0,0);\n"
-    "    if (get_sub_group_local_id() & 1) {"
-    "        uint4 partial_ballot_mask = "
-    "        if (sub_group_inverse_ballot(partial_ballot_mask)) {\n"
-    "            value = (uint4)(1,0,0,1);\n"
-    "        } else {\n"
-    "            value = (uint4)(0,0,0,1);\n"
-    "        }\n"
-    "    } else {\n"
-    "       uint4 partial_ballot_mask = "
-    "(uint4)(0x55555555,0x55555555,0x55555555,0x55555555);"
-    "        if (sub_group_inverse_ballot(partial_ballot_mask)) {\n"
-    "            value = (uint4)(1,0,0,2);\n"
-    "        } else {\n"
-    "            value = (uint4)(0,0,0,2);\n"
-    "        }\n"
-    "    }\n"
-    "    out[gid] = value;\n"
-    "}\n";
-static const char *ballot_bit_extract_source =
-    "__kernel void test_sub_group_ballot_bit_extract(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint index = xy[gid].z;\n"
-    "    uint4 value = (uint4)(10,0,0,0);\n"
-    "    if (get_sub_group_local_id() & 1) {"
-    "       if (sub_group_ballot_bit_extract(x, xy[gid].z)) {\n"
-    "           value = (uint4)(1,0,0,1);\n"
-    "       } else {\n"
-    "           value = (uint4)(0,0,0,1);\n"
-    "       }\n"
-    "    } else {\n"
-    "       if (sub_group_ballot_bit_extract(x, xy[gid].w)) {\n"
-    "           value = (uint4)(1,0,0,2);\n"
-    "       } else {\n"
-    "           value = (uint4)(0,0,0,2);\n"
-    "       }\n"
-    "    }\n"
-    "    out[gid] = value;\n"
-    "}\n";
-template <typename T> int run_non_uniform_broadcast_for_type(RunTestForType rft)
-    int error =
-        rft.run_impl<T, BC<T, SubgroupsBroadcastOp::non_uniform_broadcast>>(
-            "test_bcast_non_uniform", bcast_non_uniform_source);
-    return error;
-int test_subgroup_functions_ballot(cl_device_id device, cl_context context,
-                                   cl_command_queue queue, int num_elements)
-    std::vector<std::string> required_extensions = { "cl_khr_subgroup_ballot" };
-    constexpr size_t global_work_size = 170;
-    constexpr size_t local_work_size = 64;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-    // non uniform broadcast functions
-    int error = run_non_uniform_broadcast_for_type<cl_int>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_int2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_int3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_int4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_int8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_int16>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uint>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uint2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_uint3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uint4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uint8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uint16>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_char>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_char2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_char3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_char4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_char8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_char16>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uchar>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uchar2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_uchar3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uchar4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uchar8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_uchar16>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_short>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_short2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_short3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_short4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_short8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_short16>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ushort>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ushort2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_ushort3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ushort4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ushort8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ushort16>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_long>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_long2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_long3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_long4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_long8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_long16>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ulong>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ulong2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_ulong3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ulong4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ulong8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_ulong16>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_float>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_float2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_float3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_float4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_float8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_float16>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_double>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_double2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_double3>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_double4>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_double8>(rft);
-    error |= run_non_uniform_broadcast_for_type<cl_double16>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half2>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half3>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half4>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half8>(rft);
-    error |= run_non_uniform_broadcast_for_type<subgroups::cl_half16>(rft);
-    // broadcast first functions
-    error |=
-        rft.run_impl<cl_int, BC<cl_int, SubgroupsBroadcastOp::broadcast_first>>(
-            "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_uint,
-                          BC<cl_uint, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_long,
-                          BC<cl_long, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_ulong,
-                          BC<cl_ulong, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_short,
-                          BC<cl_short, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_ushort,
-                          BC<cl_ushort, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_char,
-                          BC<cl_char, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_uchar,
-                          BC<cl_uchar, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_float,
-                          BC<cl_float, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<cl_double,
-                          BC<cl_double, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    error |= rft.run_impl<
-        subgroups::cl_half,
-        BC<subgroups::cl_half, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
-    // mask functions
-    error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::eq_mask>>(
-        "test_get_sub_group_eq_mask", get_subgroup_eq_mask_source);
-    error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::ge_mask>>(
-        "test_get_sub_group_ge_mask", get_subgroup_ge_mask_source);
-    error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::gt_mask>>(
-        "test_get_sub_group_gt_mask", get_subgroup_gt_mask_source);
-    error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::le_mask>>(
-        "test_get_sub_group_le_mask", get_subgroup_le_mask_source);
-    error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::lt_mask>>(
-        "test_get_sub_group_lt_mask", get_subgroup_lt_mask_source);
-    // ballot functions
-    error |= rft.run_impl<cl_uint, BALLOT<cl_uint>>("test_sub_group_ballot",
-                                                    ballot_source);
-    error |= rft.run_impl<cl_uint4,
-                          BALLOT_INVERSE<cl_uint4, BallotOp::inverse_ballot>>(
-        "test_sub_group_ballot_inverse", ballot_source_inverse);
-    error |= rft.run_impl<
-        cl_uint4, BALLOT_BIT_EXTRACT<cl_uint4, BallotOp::ballot_bit_extract>>(
-        "test_sub_group_ballot_bit_extract", ballot_bit_extract_source);
-    error |= rft.run_impl<
-        cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_bit_count>>(
-        "test_sub_group_ballot_bit_count", ballot_bit_count_source);
-    error |= rft.run_impl<
-        cl_uint4,
-        BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_inclusive_scan>>(
-        "test_sub_group_ballot_inclusive_scan", ballot_inclusive_scan_source);
-    error |= rft.run_impl<
-        cl_uint4,
-        BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_exclusive_scan>>(
-        "test_sub_group_ballot_exclusive_scan", ballot_exclusive_scan_source);
-    error |= rft.run_impl<
-        cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_lsb>>(
-        "test_sub_group_ballot_find_lsb", ballot_find_lsb_source);
-    error |= rft.run_impl<
-        cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_msb>>(
-        "test_sub_group_ballot_find_msb", ballot_find_msb_source);
-    return error;
diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
deleted file mode 100644
index 588e9ce..0000000
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ /dev/null
@@ -1,340 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_templates.h"
-#include "harness/typeWrappers.h"
-#define CLUSTER_SIZE 4
-#define CLUSTER_SIZE_STR "4"
-namespace {
-static const char *redadd_clustered_source =
-    "__kernel void test_redadd_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-static const char *redmax_clustered_source =
-    "__kernel void test_redmax_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-static const char *redmin_clustered_source =
-    "__kernel void test_redmin_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-static const char *redmul_clustered_source =
-    "__kernel void test_redmul_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-static const char *redand_clustered_source =
-    "__kernel void test_redand_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-static const char *redor_clustered_source =
-    "__kernel void test_redor_clustered(const __global Type *in, __global int4 "
-    "*xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-static const char *redxor_clustered_source =
-    "__kernel void test_redxor_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-static const char *redand_clustered_logical_source =
-    "__kernel void test_redand_clustered_logical(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR
-    ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = "
-    "sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR ");\n"
-    "}\n";
-static const char *redor_clustered_logical_source =
-    "__kernel void test_redor_clustered_logical(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR
-    ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = "
-    "sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR ");\n"
-    "}\n";
-static const char *redxor_clustered_logical_source =
-    "__kernel void test_redxor_clustered_logical(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if ( sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR
-    ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = "
-    "sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR ");\n"
-    "}\n";
-// Test for reduce cluster functions
-template <typename Ty, ArithmeticOp operation> struct RED_CLU
-    static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        ng = ng / nw;
-        log_info("  sub_group_clustered_reduce_%s(%s, %d bytes) ...\n",
-                 operation_names(operation), TypeManager<Ty>::name(),
-                 sizeof(Ty));
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
-    }
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        int nj = (nw + ns - 1) / ns;
-        ng = ng / nw;
-        for (int k = 0; k < ng; ++k)
-        {
-            std::vector<cl_int> data_type_sizes;
-            // Map to array indexed to array indexed by local ID and sub group
-            for (int j = 0; j < nw; ++j)
-            {
-                mx[j] = x[j];
-                my[j] = y[j];
-                data_type_sizes.push_back(m[4 * j + 3]);
-            }
-            for (cl_int dts : data_type_sizes)
-            {
-                if (dts != sizeof(Ty))
-                {
-                    log_error("ERROR: sub_group_clustered_reduce_%s(%s) "
-                              "wrong data type size detected, expected: %d, "
-                              "used by device %d, in group %d\n",
-                              operation_names(operation),
-                              TypeManager<Ty>::name(), sizeof(Ty), dts, k);
-                    return TEST_FAIL;
-                }
-            }
-            for (int j = 0; j < nj; ++j)
-            {
-                int ii = j * ns;
-                int n = ii + ns > nw ? nw - ii : ns;
-                int midx = 4 * ii + 2;
-                std::vector<Ty> clusters_results;
-                int clusters_counter = ns / CLUSTER_SIZE;
-                clusters_results.resize(clusters_counter);
-                // Compute target
-                Ty tr = mx[ii];
-                for (int i = 0; i < n; ++i)
-                {
-                    if (i % CLUSTER_SIZE == 0)
-                        tr = mx[ii + i];
-                    else
-                        tr = calculate<Ty>(tr, mx[ii + i], operation);
-                    clusters_results[i / CLUSTER_SIZE] = tr;
-                }
-                // Check result
-                for (int i = 0; i < n; ++i)
-                {
-                    Ty rr = my[ii + i];
-                    tr = clusters_results[i / CLUSTER_SIZE];
-                    if (!compare(rr, tr))
-                    {
-                        log_error(
-                            "ERROR: sub_group_clustered_reduce_%s(%s) mismatch "
-                            "for local id %d in sub group %d in group %d\n",
-                            operation_names(operation), TypeManager<Ty>::name(),
-                            i, j, k);
-                        return TEST_FAIL;
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  sub_group_clustered_reduce_%s(%s, %d bytes) ... passed\n",
-                 operation_names(operation), TypeManager<Ty>::name(),
-                 sizeof(Ty));
-        return TEST_PASS;
-    }
-template <typename T>
-int run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::add_>>(
-        "test_redadd_clustered", redadd_clustered_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::max_>>(
-        "test_redmax_clustered", redmax_clustered_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::min_>>(
-        "test_redmin_clustered", redmin_clustered_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::mul_>>(
-        "test_redmul_clustered", redmul_clustered_source);
-    return error;
-template <typename T> int run_cluster_and_or_xor_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::and_>>(
-        "test_redand_clustered", redand_clustered_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::or_>>(
-        "test_redor_clustered", redor_clustered_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::xor_>>(
-        "test_redxor_clustered", redxor_clustered_source);
-    return error;
-template <typename T>
-int run_cluster_logical_and_or_xor_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_and>>(
-        "test_redand_clustered_logical", redand_clustered_logical_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_or>>(
-        "test_redor_clustered_logical", redor_clustered_logical_source);
-    error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_xor>>(
-        "test_redxor_clustered_logical", redxor_clustered_logical_source);
-    return error;
-int test_subgroup_functions_clustered_reduce(cl_device_id device,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements)
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_clustered_reduce"
-    };
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-    int error = run_cluster_red_add_max_min_mul_for_type<cl_int>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_uint>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_long>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_ulong>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_short>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_ushort>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_char>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_uchar>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_float>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<cl_double>(rft);
-    error |= run_cluster_red_add_max_min_mul_for_type<subgroups::cl_half>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_int>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_uint>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_long>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_ulong>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_short>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_ushort>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_char>(rft);
-    error |= run_cluster_and_or_xor_for_type<cl_uchar>(rft);
-    error |= run_cluster_logical_and_or_xor_for_type<cl_int>(rft);
-    return error;
diff --git a/test_conformance/subgroups/test_subgroup_extended_types.cpp b/test_conformance/subgroups/test_subgroup_extended_types.cpp
deleted file mode 100644
index 98401b8..0000000
--- a/test_conformance/subgroups/test_subgroup_extended_types.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_kernels.h"
-#include "subgroup_common_templates.h"
-#include "harness/typeWrappers.h"
-namespace {
-template <typename T> int run_broadcast_for_extended_type(RunTestForType rft)
-    int error = rft.run_impl<T, BC<T, SubgroupsBroadcastOp::broadcast>>(
-        "test_bcast", bcast_source);
-    return error;
-template <typename T> int run_scan_reduction_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("test_redadd",
-                                                               redadd_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("test_redmax",
-                                                            redmax_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("test_redmin",
-                                                            redmin_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>("test_scinadd",
-                                                             scinadd_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>("test_scinmax",
-                                                             scinmax_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>("test_scinmin",
-                                                             scinmin_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>("test_scexadd",
-                                                             scexadd_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>("test_scexmax",
-                                                             scexmax_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>("test_scexmin",
-                                                             scexmin_source);
-    return error;
-int test_subgroup_functions_extended_types(cl_device_id device,
-                                           cl_context context,
-                                           cl_command_queue queue,
-                                           int num_elements)
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_extended_types"
-    };
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-    int error = run_broadcast_for_extended_type<cl_uint2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_uint3>(rft);
-    error |= run_broadcast_for_extended_type<cl_uint4>(rft);
-    error |= run_broadcast_for_extended_type<cl_uint8>(rft);
-    error |= run_broadcast_for_extended_type<cl_uint16>(rft);
-    error |= run_broadcast_for_extended_type<cl_int2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_int3>(rft);
-    error |= run_broadcast_for_extended_type<cl_int4>(rft);
-    error |= run_broadcast_for_extended_type<cl_int8>(rft);
-    error |= run_broadcast_for_extended_type<cl_int16>(rft);
-    error |= run_broadcast_for_extended_type<cl_ulong2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_ulong3>(rft);
-    error |= run_broadcast_for_extended_type<cl_ulong4>(rft);
-    error |= run_broadcast_for_extended_type<cl_ulong8>(rft);
-    error |= run_broadcast_for_extended_type<cl_ulong16>(rft);
-    error |= run_broadcast_for_extended_type<cl_long2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_long3>(rft);
-    error |= run_broadcast_for_extended_type<cl_long4>(rft);
-    error |= run_broadcast_for_extended_type<cl_long8>(rft);
-    error |= run_broadcast_for_extended_type<cl_long16>(rft);
-    error |= run_broadcast_for_extended_type<cl_float2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_float3>(rft);
-    error |= run_broadcast_for_extended_type<cl_float4>(rft);
-    error |= run_broadcast_for_extended_type<cl_float8>(rft);
-    error |= run_broadcast_for_extended_type<cl_float16>(rft);
-    error |= run_broadcast_for_extended_type<cl_double2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_double3>(rft);
-    error |= run_broadcast_for_extended_type<cl_double4>(rft);
-    error |= run_broadcast_for_extended_type<cl_double8>(rft);
-    error |= run_broadcast_for_extended_type<cl_double16>(rft);
-    error |= run_broadcast_for_extended_type<cl_ushort2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_ushort3>(rft);
-    error |= run_broadcast_for_extended_type<cl_ushort4>(rft);
-    error |= run_broadcast_for_extended_type<cl_ushort8>(rft);
-    error |= run_broadcast_for_extended_type<cl_ushort16>(rft);
-    error |= run_broadcast_for_extended_type<cl_short2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_short3>(rft);
-    error |= run_broadcast_for_extended_type<cl_short4>(rft);
-    error |= run_broadcast_for_extended_type<cl_short8>(rft);
-    error |= run_broadcast_for_extended_type<cl_short16>(rft);
-    error |= run_broadcast_for_extended_type<cl_uchar2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_uchar3>(rft);
-    error |= run_broadcast_for_extended_type<cl_uchar4>(rft);
-    error |= run_broadcast_for_extended_type<cl_uchar8>(rft);
-    error |= run_broadcast_for_extended_type<cl_uchar16>(rft);
-    error |= run_broadcast_for_extended_type<cl_char2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_char3>(rft);
-    error |= run_broadcast_for_extended_type<cl_char4>(rft);
-    error |= run_broadcast_for_extended_type<cl_char8>(rft);
-    error |= run_broadcast_for_extended_type<cl_char16>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_half2>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_half3>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_half4>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_half8>(rft);
-    error |= run_broadcast_for_extended_type<subgroups::cl_half16>(rft);
-    error |= run_scan_reduction_for_type<cl_uchar>(rft);
-    error |= run_scan_reduction_for_type<cl_char>(rft);
-    error |= run_scan_reduction_for_type<cl_ushort>(rft);
-    error |= run_scan_reduction_for_type<cl_short>(rft);
-    return error;
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
deleted file mode 100644
index eb46ff0..0000000
--- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
+++ /dev/null
@@ -1,473 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-#include "subhelpers.h"
-#include "harness/typeWrappers.h"
-#include "subgroup_common_templates.h"
-namespace {
-static const char *scinadd_non_uniform_source = R"(
-    __kernel void test_scinadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_add(in[gid]);
-            }
-    }
-static const char *scinmax_non_uniform_source = R"(
-    __kernel void test_scinmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_max(in[gid]);
-            }
-    }
-static const char *scinmin_non_uniform_source = R"(
-    __kernel void test_scinmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_min(in[gid]);
-            }
-    }
-static const char *scinmul_non_uniform_source = R"(
-    __kernel void test_scinmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_mul(in[gid]);
-            }
-    }
-static const char *scinand_non_uniform_source = R"(
-    __kernel void test_scinand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_and(in[gid]);
-            }
-    }
-static const char *scinor_non_uniform_source = R"(
-    __kernel void test_scinor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_or(in[gid]);
-            }
-    }
-static const char *scinxor_non_uniform_source = R"(
-    __kernel void test_scinxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_xor(in[gid]);
-            }
-    }
-static const char *scinand_non_uniform_logical_source = R"(
-    __kernel void test_scinand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_logical_and(in[gid]);
-            }
-    }
-static const char *scinor_non_uniform_logical_source = R"(
-    __kernel void test_scinor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_logical_or(in[gid]);
-            }
-    }
-static const char *scinxor_non_uniform_logical_source = R"(
-    __kernel void test_scinxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_logical_xor(in[gid]);
-            }
-    }
-static const char *scexadd_non_uniform_source = R"(
-    __kernel void test_scexadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_add(in[gid]);
-            }
-    }
-static const char *scexmax_non_uniform_source = R"(
-    __kernel void test_scexmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_max(in[gid]);
-            }
-    }
-static const char *scexmin_non_uniform_source = R"(
-    __kernel void test_scexmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_min(in[gid]);
-            }
-    }
-static const char *scexmul_non_uniform_source = R"(
-    __kernel void test_scexmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_mul(in[gid]);
-            }
-    }
-static const char *scexand_non_uniform_source = R"(
-    __kernel void test_scexand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_and(in[gid]);
-            }
-    }
-static const char *scexor_non_uniform_source = R"(
-    __kernel void test_scexor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_or(in[gid]);
-            }
-    }
-static const char *scexxor_non_uniform_source = R"(
-    __kernel void test_scexxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_xor(in[gid]);
-            }
-    }
-static const char *scexand_non_uniform_logical_source = R"(
-    __kernel void test_scexand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_logical_and(in[gid]);
-            }
-    }
-static const char *scexor_non_uniform_logical_source = R"(
-    __kernel void test_scexor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_logical_or(in[gid]);
-            }
-    }
-static const char *scexxor_non_uniform_logical_source = R"(
-    __kernel void test_scexxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_logical_xor(in[gid]);
-            }
-    }
-static const char *redadd_non_uniform_source = R"(
-    __kernel void test_redadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_add(in[gid]);
-            }
-    }
-static const char *redmax_non_uniform_source = R"(
-    __kernel void test_redmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_max(in[gid]);
-            }
-    }
-static const char *redmin_non_uniform_source = R"(
-    __kernel void test_redmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_min(in[gid]);
-            }
-    }
-static const char *redmul_non_uniform_source = R"(
-    __kernel void test_redmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_mul(in[gid]);
-            }
-    }
-static const char *redand_non_uniform_source = R"(
-    __kernel void test_redand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_and(in[gid]);
-            }
-    }
-static const char *redor_non_uniform_source = R"(
-    __kernel void test_redor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_or(in[gid]);
-            }
-    }
-static const char *redxor_non_uniform_source = R"(
-    __kernel void test_redxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_xor(in[gid]);
-            }
-    }
-static const char *redand_non_uniform_logical_source = R"(
-    __kernel void test_redand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_logical_and(in[gid]);
-            }
-    }
-static const char *redor_non_uniform_logical_source = R"(
-    __kernel void test_redor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_logical_or(in[gid]);
-            }
-    }
-static const char *redxor_non_uniform_logical_source = R"(
-    __kernel void test_redxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_logical_xor(in[gid]);
-            }
-    }
-template <typename T>
-int run_functions_add_mul_max_min_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
-        "test_scinadd_non_uniform", scinadd_non_uniform_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::mul_>>(
-        "test_scinmul_non_uniform", scinmul_non_uniform_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
-        "test_scinmax_non_uniform", scinmax_non_uniform_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
-        "test_scinmin_non_uniform", scinmin_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
-        "test_scexadd_non_uniform", scexadd_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::mul_>>(
-        "test_scexmul_non_uniform", scexmul_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
-        "test_scexmax_non_uniform", scexmax_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
-        "test_scexmin_non_uniform", scexmin_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>(
-        "test_redadd_non_uniform", redadd_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::mul_>>(
-        "test_redmul_non_uniform", redmul_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>(
-        "test_redmax_non_uniform", redmax_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>(
-        "test_redmin_non_uniform", redmin_non_uniform_source);
-    return error;
-template <typename T> int run_functions_and_or_xor_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::and_>>(
-        "test_scinand_non_uniform", scinand_non_uniform_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::or_>>(
-        "test_scinor_non_uniform", scinor_non_uniform_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::xor_>>(
-        "test_scinxor_non_uniform", scinxor_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::and_>>(
-        "test_scexand_non_uniform", scexand_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::or_>>(
-        "test_scexor_non_uniform", scexor_non_uniform_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::xor_>>(
-        "test_scexxor_non_uniform", scexxor_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::and_>>(
-        "test_redand_non_uniform", redand_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::or_>>(
-        "test_redor_non_uniform", redor_non_uniform_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::xor_>>(
-        "test_redxor_non_uniform", redxor_non_uniform_source);
-    return error;
-template <typename T>
-int run_functions_logical_and_or_xor_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_and>>(
-        "test_scinand_non_uniform_logical", scinand_non_uniform_logical_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_or>>(
-        "test_scinor_non_uniform_logical", scinor_non_uniform_logical_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_xor>>(
-        "test_scinxor_non_uniform_logical", scinxor_non_uniform_logical_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_and>>(
-        "test_scexand_non_uniform_logical", scexand_non_uniform_logical_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_or>>(
-        "test_scexor_non_uniform_logical", scexor_non_uniform_logical_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_xor>>(
-        "test_scexxor_non_uniform_logical", scexxor_non_uniform_logical_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_and>>(
-        "test_redand_non_uniform_logical", redand_non_uniform_logical_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_or>>(
-        "test_redor_non_uniform_logical", redor_non_uniform_logical_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_xor>>(
-        "test_redxor_non_uniform_logical", redxor_non_uniform_logical_source);
-    return error;
-int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
-                                                   cl_context context,
-                                                   cl_command_queue queue,
-                                                   int num_elements)
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_non_uniform_arithmetic"
-    };
-    std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
-                                 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
-                                 0x00ffff00, 0x80000000, 0xaaaaaaaa };
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions, masks);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-    int error = run_functions_add_mul_max_min_for_type<cl_int>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_uint>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_long>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_ulong>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_short>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_ushort>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_char>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_uchar>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_float>(rft);
-    error |= run_functions_add_mul_max_min_for_type<cl_double>(rft);
-    error |= run_functions_add_mul_max_min_for_type<subgroups::cl_half>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_int>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_uint>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_long>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_ulong>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_short>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_ushort>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_char>(rft);
-    error |= run_functions_and_or_xor_for_type<cl_uchar>(rft);
-    error |= run_functions_logical_and_or_xor_for_type<cl_int>(rft);
-    return error;
\ No newline at end of file
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
deleted file mode 100644
index 2b00b4d..0000000
--- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-#include "subhelpers.h"
-#include "harness/typeWrappers.h"
-#include <set>
-namespace {
-template <typename T, NonUniformVoteOp operation> struct VOTE
-    static void gen(T *x, T *t, cl_int *m, const WorkGroupParams &test_params)
-    {
-        int i, ii, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        int nj = (nw + ns - 1) / ns;
-        int non_uniform_size = ng % nw;
-        ng = ng / nw;
-        int last_subgroup_size = 0;
-        ii = 0;
-        log_info("  sub_group_%s%s... \n",
-                 (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
-                 operation_names(operation));
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x data type (%s)\n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask,
-                 TypeManager<T>::name());
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
-        if (operation == NonUniformVoteOp::elect) return;
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            if (non_uniform_size && k == ng - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, nj, ns, nw,
-                                          last_subgroup_size);
-            }
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                if (last_subgroup_size && j == nj - 1)
-                {
-                    n = last_subgroup_size;
-                }
-                else
-                {
-                    n = ii + ns > nw ? nw - ii : ns;
-                }
-                int e = genrand_int32(gMTdata) % 3;
-                for (i = 0; i < n; i++)
-                {
-                    if (e == 2)
-                    { // set once 0 and once 1 alternately
-                        int value = i % 2;
-                        set_value(t[ii + i], value);
-                    }
-                    else
-                    { // set 0/1 for all work items in subgroup
-                        set_value(t[ii + i], e);
-                    }
-                }
-            }
-            // Now map into work group using map from device
-            for (j = 0; j < nw; ++j)
-            {
-                x[j] = t[j];
-            }
-            x += nw;
-            m += 4 * nw;
-        }
-    }
-    static int chk(T *x, T *y, T *mx, T *my, cl_int *m,
-                   const WorkGroupParams &test_params)
-    {
-        int ii, i, j, k, n;
-        int nw = test_params.local_workgroup_size;
-        int ns = test_params.subgroup_size;
-        int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
-        int nj = (nw + ns - 1) / ns;
-        cl_int tr, rr;
-        int non_uniform_size = ng % nw;
-        ng = ng / nw;
-        if (non_uniform_size) ng++;
-        int last_subgroup_size = 0;
-        for (k = 0; k < ng; ++k)
-        { // for each work_group
-            if (non_uniform_size && k == ng - 1)
-            {
-                set_last_workgroup_params(non_uniform_size, nj, ns, nw,
-                                          last_subgroup_size);
-            }
-            for (j = 0; j < nw; ++j)
-            { // inside the work_group
-                mx[j] = x[j]; // read host inputs for work_group
-                my[j] = y[j]; // read device outputs for work_group
-            }
-            for (j = 0; j < nj; ++j)
-            { // for each subgroup
-                ii = j * ns;
-                if (last_subgroup_size && j == nj - 1)
-                {
-                    n = last_subgroup_size;
-                }
-                else
-                {
-                    n = ii + ns > nw ? nw - ii : ns;
-                }
-                rr = 0;
-                if (operation == NonUniformVoteOp::all
-                    || operation == NonUniformVoteOp::all_equal)
-                    tr = 1;
-                if (operation == NonUniformVoteOp::any) tr = 0;
-                std::set<int> active_work_items;
-                for (i = 0; i < n; ++i)
-                {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (work_items_mask & check_work_item)
-                    {
-                        active_work_items.insert(i);
-                        switch (operation)
-                        {
-                            case NonUniformVoteOp::elect: break;
-                            case NonUniformVoteOp::all:
-                                tr &=
-                                    !compare_ordered<T>(mx[ii + i], 0) ? 1 : 0;
-                                break;
-                            case NonUniformVoteOp::any:
-                                tr |=
-                                    !compare_ordered<T>(mx[ii + i], 0) ? 1 : 0;
-                                break;
-                            case NonUniformVoteOp::all_equal:
-                                tr &= compare_ordered<T>(
-                                          mx[ii + i],
-                                          mx[ii + *active_work_items.begin()])
-                                    ? 1
-                                    : 0;
-                                break;
-                            default:
-                                log_error("Unknown operation\n");
-                                return TEST_FAIL;
-                        }
-                    }
-                }
-                if (active_work_items.empty())
-                {
-                    log_info("  no one workitem acitve... in workgroup id = %d "
-                             "subgroup id = %d\n",
-                             k, j);
-                }
-                else
-                {
-                    auto lowest_active = active_work_items.begin();
-                    for (const int &active_work_item : active_work_items)
-                    {
-                        i = active_work_item;
-                        if (operation == NonUniformVoteOp::elect)
-                        {
-                            i == *lowest_active ? tr = 1 : tr = 0;
-                        }
-                        // normalize device values on host, non zero set 1.
-                        rr = compare_ordered<T>(my[ii + i], 0) ? 0 : 1;
-                        if (rr != tr)
-                        {
-                            log_error("ERROR: sub_group_%s() \n",
-                                      operation_names(operation));
-                            log_error(
-                                "mismatch for work item %d sub group %d in "
-                                "work group %d. Expected: %d Obtained: %d\n",
-                                i, j, k, tr, rr);
-                            return TEST_FAIL;
-                        }
-                    }
-                }
-            }
-            x += nw;
-            y += nw;
-            m += 4 * nw;
-        }
-        log_info("  sub_group_%s%s... passed\n",
-                 (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
-                 operation_names(operation));
-        return TEST_PASS;
-    }
-static const char *elect_source = R"(
-    __kernel void test_elect(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_elect();
-            }
-    }
-static const char *non_uniform_any_source = R"(
-    __kernel void test_non_uniform_any(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_any(in[gid]);
-            }
-    }
-static const char *non_uniform_all_source = R"(
-    __kernel void test_non_uniform_all(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_all(in[gid]);
-            }
-    }
-static const char *non_uniform_all_equal_source = R"(
-    __kernel void test_non_uniform_all_equal(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_all_equal(in[gid]);
-            }
-    }
-template <typename T> int run_vote_all_equal_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, VOTE<T, NonUniformVoteOp::all_equal>>(
-        "test_non_uniform_all_equal", non_uniform_all_equal_source);
-    return error;
-int test_subgroup_functions_non_uniform_vote(cl_device_id device,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements)
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_non_uniform_vote"
-    };
-    std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
-                                 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
-                                 0x00ffff00, 0x80000000 };
-    constexpr size_t global_work_size = 170;
-    constexpr size_t local_work_size = 64;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions, masks);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-    int error = run_vote_all_equal_for_type<cl_int>(rft);
-    error |= run_vote_all_equal_for_type<cl_uint>(rft);
-    error |= run_vote_all_equal_for_type<cl_long>(rft);
-    error |= run_vote_all_equal_for_type<cl_ulong>(rft);
-    error |= run_vote_all_equal_for_type<cl_float>(rft);
-    error |= run_vote_all_equal_for_type<cl_double>(rft);
-    error |= run_vote_all_equal_for_type<subgroups::cl_half>(rft);
-    error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::all>>(
-        "test_non_uniform_all", non_uniform_all_source);
-    error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::elect>>(
-        "test_elect", elect_source);
-    error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::any>>(
-        "test_non_uniform_any", non_uniform_any_source);
-    return error;
diff --git a/test_conformance/subgroups/test_subgroup_shuffle.cpp b/test_conformance/subgroups/test_subgroup_shuffle.cpp
deleted file mode 100644
index 049f098..0000000
--- a/test_conformance/subgroups/test_subgroup_shuffle.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_templates.h"
-#include "harness/typeWrappers.h"
-#include <bitset>
-namespace {
-static const char* shuffle_xor_source =
-    "__kernel void test_sub_group_shuffle_xor(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle_xor(x, xy[gid].z);"
-    "}\n";
-static const char* shuffle_source =
-    "__kernel void test_sub_group_shuffle(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle(x, xy[gid].z);"
-    "}\n";
-template <typename T> int run_shuffle_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle>>(
-        "test_sub_group_shuffle", shuffle_source);
-    error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_xor>>(
-        "test_sub_group_shuffle_xor", shuffle_xor_source);
-    return error;
-int test_subgroup_functions_shuffle(cl_device_id device, cl_context context,
-                                    cl_command_queue queue, int num_elements)
-    std::vector<std::string> required_extensions{ "cl_khr_subgroup_shuffle" };
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-    int error = run_shuffle_for_type<cl_int>(rft);
-    error |= run_shuffle_for_type<cl_uint>(rft);
-    error |= run_shuffle_for_type<cl_long>(rft);
-    error |= run_shuffle_for_type<cl_ulong>(rft);
-    error |= run_shuffle_for_type<cl_short>(rft);
-    error |= run_shuffle_for_type<cl_ushort>(rft);
-    error |= run_shuffle_for_type<cl_char>(rft);
-    error |= run_shuffle_for_type<cl_uchar>(rft);
-    error |= run_shuffle_for_type<cl_float>(rft);
-    error |= run_shuffle_for_type<cl_double>(rft);
-    error |= run_shuffle_for_type<subgroups::cl_half>(rft);
-    return error;
diff --git a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
deleted file mode 100644
index 6000c97..0000000
--- a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "procs.h"
-#include "subhelpers.h"
-#include "subgroup_common_templates.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-namespace {
-static const char* shuffle_down_source =
-    "__kernel void test_sub_group_shuffle_down(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle_down(x, xy[gid].z);"
-    "}\n";
-static const char* shuffle_up_source =
-    "__kernel void test_sub_group_shuffle_up(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle_up(x, xy[gid].z);"
-    "}\n";
-template <typename T> int run_shuffle_relative_for_type(RunTestForType rft)
-    int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle_up>>(
-        "test_sub_group_shuffle_up", shuffle_up_source);
-    error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_down>>(
-        "test_sub_group_shuffle_down", shuffle_down_source);
-    return error;
-int test_subgroup_functions_shuffle_relative(cl_device_id device,
-                                             cl_context context,
-                                             cl_command_queue queue,
-                                             int num_elements)
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_shuffle_relative"
-    };
-    constexpr size_t global_work_size = 2000;
-    constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
-    int error = run_shuffle_relative_for_type<cl_int>(rft);
-    error |= run_shuffle_relative_for_type<cl_uint>(rft);
-    error |= run_shuffle_relative_for_type<cl_long>(rft);
-    error |= run_shuffle_relative_for_type<cl_ulong>(rft);
-    error |= run_shuffle_relative_for_type<cl_short>(rft);
-    error |= run_shuffle_relative_for_type<cl_ushort>(rft);
-    error |= run_shuffle_relative_for_type<cl_char>(rft);
-    error |= run_shuffle_relative_for_type<cl_uchar>(rft);
-    error |= run_shuffle_relative_for_type<cl_float>(rft);
-    error |= run_shuffle_relative_for_type<cl_double>(rft);
-    error |= run_shuffle_relative_for_type<subgroups::cl_half>(rft);
-    return error;
diff --git a/test_conformance/subgroups/test_workgroup.cpp b/test_conformance/subgroups/test_workgroup.cpp
new file mode 100644
index 0000000..bc9d619
--- /dev/null
+++ b/test_conformance/subgroups/test_workgroup.cpp
@@ -0,0 +1,875 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "procs.h"
+#include "subhelpers.h"
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+static const char *any_source = "__kernel void test_any(const __global Type "
+                                "*in, __global int2 *xy, __global Type *out)\n"
+                                "{\n"
+                                "    int gid = get_global_id(0);\n"
+                                "    XY(xy,gid);\n"
+                                "    out[gid] = sub_group_any(in[gid]);\n"
+                                "}\n";
+static const char *all_source = "__kernel void test_all(const __global Type "
+                                "*in, __global int2 *xy, __global Type *out)\n"
+                                "{\n"
+                                "    int gid = get_global_id(0);\n"
+                                "    XY(xy,gid);\n"
+                                "    out[gid] = sub_group_all(in[gid]);\n"
+                                "}\n";
+static const char *bcast_source =
+    "__kernel void test_bcast(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    Type x = in[gid];\n"
+    "    size_t loid = (size_t)((int)x % 100);\n"
+    "    out[gid] = sub_group_broadcast(x, loid);\n"
+    "}\n";
+static const char *redadd_source =
+    "__kernel void test_redadd(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_reduce_add(in[gid]);\n"
+    "}\n";
+static const char *redmax_source =
+    "__kernel void test_redmax(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_reduce_max(in[gid]);\n"
+    "}\n";
+static const char *redmin_source =
+    "__kernel void test_redmin(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_reduce_min(in[gid]);\n"
+    "}\n";
+static const char *scinadd_source =
+    "__kernel void test_scinadd(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_inclusive_add(in[gid]);\n"
+    "}\n";
+static const char *scinmax_source =
+    "__kernel void test_scinmax(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_inclusive_max(in[gid]);\n"
+    "}\n";
+static const char *scinmin_source =
+    "__kernel void test_scinmin(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_inclusive_min(in[gid]);\n"
+    "}\n";
+static const char *scexadd_source =
+    "__kernel void test_scexadd(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_exclusive_add(in[gid]);\n"
+    "}\n";
+static const char *scexmax_source =
+    "__kernel void test_scexmax(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_exclusive_max(in[gid]);\n"
+    "}\n";
+static const char *scexmin_source =
+    "__kernel void test_scexmin(const __global Type *in, __global int2 *xy, "
+    "__global Type *out)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    XY(xy,gid);\n"
+    "    out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
+    "}\n";
+// Any/All test functions
+template <int Which> struct AA
+    static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        int e;
+        ii = 0;
+        for (k = 0; k < ng; ++k)
+        {
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                e = (int)(genrand_int32(gMTdata) % 3);
+                // Initialize data matrix indexed by local id and sub group id
+                switch (e)
+                {
+                    case 0: memset(&t[ii], 0, n * sizeof(cl_int)); break;
+                    case 1:
+                        memset(&t[ii], 0, n * sizeof(cl_int));
+                        i = (int)(genrand_int32(gMTdata) % (cl_uint)n);
+                        t[ii + i] = 41;
+                        break;
+                    case 2: memset(&t[ii], 0xff, n * sizeof(cl_int)); break;
+                }
+            }
+            // Now map into work group using map from device
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
+            }
+            x += nw;
+            m += 2 * nw;
+        }
+    }
+    static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
+                   int ns, int nw, int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        cl_int taa, raa;
+        log_info("  sub_group_%s...\n", Which == 0 ? "any" : "all");
+        for (k = 0; k < ng; ++k)
+        {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                // Compute target
+                if (Which == 0)
+                {
+                    taa = 0;
+                    for (i = 0; i < n; ++i) taa |= mx[ii + i] != 0;
+                }
+                else
+                {
+                    taa = 1;
+                    for (i = 0; i < n; ++i) taa &= mx[ii + i] != 0;
+                }
+                // Check result
+                for (i = 0; i < n; ++i)
+                {
+                    raa = my[ii + i] != 0;
+                    if (raa != taa)
+                    {
+                        log_error("ERROR: sub_group_%s mismatch for local id "
+                                  "%d in sub group %d in group %d\n",
+                                  Which == 0 ? "any" : "all", i, j, k);
+                        return -1;
+                    }
+                }
+            }
+            x += nw;
+            y += nw;
+            m += 2 * nw;
+        }
+        return 0;
+    }
+// Reduce functions
+template <typename Ty, int Which> struct RED
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        ii = 0;
+        for (k = 0; k < ng; ++k)
+        {
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                for (i = 0; i < n; ++i)
+                    t[ii + i] = (Ty)(
+                        (int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
+            }
+            // Now map into work group using map from device
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
+            }
+            x += nw;
+            m += 2 * nw;
+        }
+    }
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
+                   int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        Ty tr, rr;
+        log_info("  sub_group_reduce_%s(%s)...\n",
+                 Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
+                 TypeName<Ty>::val());
+        for (k = 0; k < ng; ++k)
+        {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                // Compute target
+                if (Which == 0)
+                {
+                    // add
+                    tr = mx[ii];
+                    for (i = 1; i < n; ++i) tr += mx[ii + i];
+                }
+                else if (Which == 1)
+                {
+                    // max
+                    tr = mx[ii];
+                    for (i = 1; i < n; ++i)
+                        tr = tr > mx[ii + i] ? tr : mx[ii + i];
+                }
+                else if (Which == 2)
+                {
+                    // min
+                    tr = mx[ii];
+                    for (i = 1; i < n; ++i)
+                        tr = tr > mx[ii + i] ? mx[ii + i] : tr;
+                }
+                // Check result
+                for (i = 0; i < n; ++i)
+                {
+                    rr = my[ii + i];
+                    if (rr != tr)
+                    {
+                        log_error("ERROR: sub_group_reduce_%s(%s) mismatch for "
+                                  "local id %d in sub group %d in group %d\n",
+                                  Which == 0 ? "add"
+                                             : (Which == 1 ? "max" : "min"),
+                                  TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+            x += nw;
+            y += nw;
+            m += 2 * nw;
+        }
+        return 0;
+    }
+// Scan Inclusive functions
+template <typename Ty, int Which> struct SCIN
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        ii = 0;
+        for (k = 0; k < ng; ++k)
+        {
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                for (i = 0; i < n; ++i)
+                    // t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff)
+                    // % ns + 1);
+                    t[ii + i] = (Ty)i;
+            }
+            // Now map into work group using map from device
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
+            }
+            x += nw;
+            m += 2 * nw;
+        }
+    }
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
+                   int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        Ty tr, rr;
+        log_info("  sub_group_scan_inclusive_%s(%s)...\n",
+                 Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
+                 TypeName<Ty>::val());
+        for (k = 0; k < ng; ++k)
+        {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                // Check result
+                for (i = 0; i < n; ++i)
+                {
+                    if (Which == 0)
+                    {
+                        tr = i == 0 ? mx[ii] : tr + mx[ii + i];
+                    }
+                    else if (Which == 1)
+                    {
+                        tr = i == 0 ? mx[ii]
+                                    : (tr > mx[ii + i] ? tr : mx[ii + i]);
+                    }
+                    else
+                    {
+                        tr = i == 0 ? mx[ii]
+                                    : (tr > mx[ii + i] ? mx[ii + i] : tr);
+                    }
+                    rr = my[ii + i];
+                    if (rr != tr)
+                    {
+                        log_error(
+                            "ERROR: sub_group_scan_inclusive_%s(%s) mismatch "
+                            "for local id %d in sub group %d in group %d\n",
+                            Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
+                            TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+            x += nw;
+            y += nw;
+            m += 2 * nw;
+        }
+        return 0;
+    }
+// Scan Exclusive functions
+template <typename Ty, int Which> struct SCEX
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        ii = 0;
+        for (k = 0; k < ng; ++k)
+        {
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                for (i = 0; i < n; ++i)
+                    t[ii + i] = (Ty)(
+                        (int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
+            }
+            // Now map into work group using map from device
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
+            }
+            x += nw;
+            m += 2 * nw;
+        }
+    }
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
+                   int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1) / ns;
+        Ty tr, trt, rr;
+        log_info("  sub_group_scan_exclusive_%s(%s)...\n",
+                 Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
+                 TypeName<Ty>::val());
+        for (k = 0; k < ng; ++k)
+        {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                // Check result
+                for (i = 0; i < n; ++i)
+                {
+                    if (Which == 0)
+                    {
+                        tr = i == 0 ? TypeIdentity<Ty, Which>::val() : tr + trt;
+                    }
+                    else if (Which == 1)
+                    {
+                        tr = i == 0 ? TypeIdentity<Ty, Which>::val()
+                                    : (trt > tr ? trt : tr);
+                    }
+                    else
+                    {
+                        tr = i == 0 ? TypeIdentity<Ty, Which>::val()
+                                    : (trt > tr ? tr : trt);
+                    }
+                    trt = mx[ii + i];
+                    rr = my[ii + i];
+                    if (rr != tr)
+                    {
+                        log_error(
+                            "ERROR: sub_group_scan_exclusive_%s(%s) mismatch "
+                            "for local id %d in sub group %d in group %d\n",
+                            Which == 0 ? "add" : (Which == 1 ? "max" : "min"),
+                            TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+            x += nw;
+            y += nw;
+            m += 2 * nw;
+        }
+        return 0;
+    }
+// Broadcast functios
+template <typename Ty> struct BC
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, l, n;
+        int nj = (nw + ns - 1) / ns;
+        int d = ns > 100 ? 100 : ns;
+        ii = 0;
+        for (k = 0; k < ng; ++k)
+        {
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                l = (int)(genrand_int32(gMTdata) & 0x7fffffff)
+                    % (d > n ? n : d);
+                for (i = 0; i < n; ++i)
+                    t[ii + i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff)
+                                         % 100 * 100
+                                     + l);
+            }
+            // Now map into work group using map from device
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                x[j] = t[i];
+            }
+            x += nw;
+            m += 2 * nw;
+        }
+    }
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw,
+                   int ng)
+    {
+        int ii, i, j, k, l, n;
+        int nj = (nw + ns - 1) / ns;
+        Ty tr, rr;
+        log_info("  sub_group_broadcast(%s)...\n", TypeName<Ty>::val());
+        for (k = 0; k < ng; ++k)
+        {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j = 0; j < nw; ++j)
+            {
+                i = m[2 * j + 1] * ns + m[2 * j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+            for (j = 0; j < nj; ++j)
+            {
+                ii = j * ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                l = (int)mx[ii] % 100;
+                tr = mx[ii + l];
+                // Check result
+                for (i = 0; i < n; ++i)
+                {
+                    rr = my[ii + i];
+                    if (rr != tr)
+                    {
+                        log_error("ERROR: sub_group_broadcast(%s) mismatch for "
+                                  "local id %d in sub group %d in group %d\n",
+                                  TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+            x += nw;
+            y += nw;
+            m += 2 * nw;
+        }
+        return 0;
+    }
+// Entry point from main
+int test_work_group_functions(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int num_elements,
+                              bool useCoreSubgroups)
+    int error;
+    // Adjust these individually below if desired/needed
+#define G 2000
+#define L 200
+    error = test<int, AA<0>, G, L>::run(device, context, queue, num_elements,
+                                        "test_any", any_source, 0,
+                                        useCoreSubgroups);
+    error |= test<int, AA<1>, G, L>::run(device, context, queue, num_elements,
+                                         "test_all", all_source, 0,
+                                         useCoreSubgroups);
+    // error |= test<cl_half, BC<cl_half>, G, L>::run(device, context, queue,
+    // num_elements, "test_bcast", bcast_source);
+    error |= test<cl_uint, BC<cl_uint>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, BC<cl_int>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, BC<cl_ulong>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, BC<cl_long>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    error |= test<float, BC<float>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    error |= test<double, BC<double>, G, L>::run(
+        device, context, queue, num_elements, "test_bcast", bcast_source, 0,
+        useCoreSubgroups);
+    // error |= test<cl_half, RED<cl_half,0>, G, L>::run(device, context, queue,
+    // num_elements, "test_redadd", redadd_source);
+    error |= test<cl_uint, RED<cl_uint, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, RED<cl_int, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, RED<cl_ulong, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, RED<cl_long, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    error |= test<float, RED<float, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    error |= test<double, RED<double, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_redadd", redadd_source, 0,
+        useCoreSubgroups);
+    // error |= test<cl_half, RED<cl_half,1>, G, L>::run(device, context, queue,
+    // num_elements, "test_redmax", redmax_source);
+    error |= test<cl_uint, RED<cl_uint, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, RED<cl_int, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, RED<cl_ulong, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, RED<cl_long, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    error |= test<float, RED<float, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    error |= test<double, RED<double, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_redmax", redmax_source, 0,
+        useCoreSubgroups);
+    // error |= test<cl_half, RED<cl_half,2>, G, L>::run(device, context, queue,
+    // num_elements, "test_redmin", redmin_source);
+    error |= test<cl_uint, RED<cl_uint, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, RED<cl_int, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, RED<cl_ulong, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, RED<cl_long, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    error |= test<float, RED<float, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    error |= test<double, RED<double, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_redmin", redmin_source, 0,
+        useCoreSubgroups);
+    // error |= test<cl_half, SCIN<cl_half,0>, G, L>::run(device, context,
+    // queue, num_elements, "test_scinadd", scinadd_source);
+    error |= test<cl_uint, SCIN<cl_uint, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCIN<cl_int, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCIN<cl_ulong, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCIN<cl_long, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCIN<float, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCIN<double, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scinadd", scinadd_source, 0,
+        useCoreSubgroups);
+    // error |= test<cl_half, SCIN<cl_half,1>, G, L>::run(device, context,
+    // queue, num_elements, "test_scinmax", scinmax_source);
+    error |= test<cl_uint, SCIN<cl_uint, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCIN<cl_int, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCIN<cl_ulong, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCIN<cl_long, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCIN<float, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCIN<double, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmax", scinmax_source, 0,
+        useCoreSubgroups);
+    // error |= test<cl_half, SCIN<cl_half,2>, G, L>::run(device, context,
+    // queue, num_elements, "test_scinmin", scinmin_source);
+    error |= test<cl_uint, SCIN<cl_uint, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCIN<cl_int, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCIN<cl_ulong, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCIN<cl_long, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCIN<float, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCIN<double, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scinmin", scinmin_source, 0,
+        useCoreSubgroups);
+    // error |= test<cl_half, SCEX<cl_half,0>, G, L>::run(device, context,
+    // queue, num_elements, "test_scexadd", scexadd_source);
+    error |= test<cl_uint, SCEX<cl_uint, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCEX<cl_int, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCEX<cl_ulong, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCEX<cl_long, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCEX<float, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCEX<double, 0>, G, L>::run(
+        device, context, queue, num_elements, "test_scexadd", scexadd_source, 0,
+        useCoreSubgroups);
+    // error |= test<cl_half, SCEX<cl_half,1>, G, L>::run(device, context,
+    // queue, num_elements, "test_scexmax", scexmax_source);
+    error |= test<cl_uint, SCEX<cl_uint, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCEX<cl_int, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCEX<cl_ulong, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCEX<cl_long, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCEX<float, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCEX<double, 1>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmax", scexmax_source, 0,
+        useCoreSubgroups);
+    // error |= test<cl_half, SCEX<cl_half,2>, G, L>::run(device, context,
+    // queue, num_elements, "test_scexmin", scexmin_source);
+    error |= test<cl_uint, SCEX<cl_uint, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_int, SCEX<cl_int, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_ulong, SCEX<cl_ulong, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    error |= test<cl_long, SCEX<cl_long, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    error |= test<float, SCEX<float, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    error |= test<double, SCEX<double, 2>, G, L>::run(
+        device, context, queue, num_elements, "test_scexmin", scexmin_source, 0,
+        useCoreSubgroups);
+    return error;
+int test_work_group_functions_core(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+    return test_work_group_functions(device, context, queue, num_elements,
+                                     true);
+int test_work_group_functions_ext(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+    bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
+    if (!hasExtension)
+    {
+        log_info(
+            "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+    return test_work_group_functions(device, context, queue, num_elements,
+                                     false);
diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp
index 7ffa6a7..b77bfe1 100644
--- a/test_conformance/subgroups/test_workitem.cpp
+++ b/test_conformance/subgroups/test_workitem.cpp
@@ -227,8 +227,9 @@
     const std::string &kernel_str = kernel_sstr.str();
     const char *kernel_src = kernel_str.c_str();
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &kernel_src, "get_test");
+    error = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel, 1, &kernel_src, "get_test",
+        "-cl-std=CL2.0");
     if (error != 0) return error;
     error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
diff --git a/test_conformance/submission_details_template.txt b/test_conformance/submission_details_template.txt
index 9d276a6..20554c8 100644
--- a/test_conformance/submission_details_template.txt
+++ b/test_conformance/submission_details_template.txt
@@ -1,5 +1,5 @@
-# OpenCL submission details template
+# OpenCL 2.0 submission details template
 # $Id $
 # $URL $
@@ -13,7 +13,8 @@
-# Submission information that needs to be copied onto the web submission form
+# Submission information (mostly section 7 and F.5 of Conformance Process 
+# Document) that needs to be copied onto the web submission form.
@@ -26,7 +27,7 @@
 # Version of OpenCL specification being tested
-OpenCL Version: Major.Minor
+OpenCL Version: 2.0
 # Statement of conformance listing each conformant product (at a specific
 # version) that is covered by this implementation. List each conformant product
@@ -73,18 +74,19 @@
-# Further submission information
+# Further submission information (mostly section 7 of Conformance Process
+# Document).
-# git tag of the tests used from GitHub (e.g. vYYYY-MM-DD-XX)
+# Date of tests used, 8 digit string as given in filename.
-Tests version:
+Tests date:
-# Implementations that support cl_khr_icd are required to use a loader to run
-# the tests and document the loader that was used.
-Loader used:
+# SHA-1 git identifier of the tests used from Gitlab (ex: 0a7770f98664a092c70d0a7d9a48d229b5fd8039)
+Test ID:
 # Date of "Khronos Conformance Process" that this submission
 # adheres to (as shown in the change history at the start of the document).
@@ -97,9 +99,47 @@
 OpenCL Conformance Process Attachment date:
-# The conformance process document makes allowances for skipping specific tests
-# in some situations. A list of tests that were skipped in accordance to these
-# rules along with the justification must be documented here.
+# List of Khronos Bugzilla bugs filed for test bugs that have been fixed
+# by modifying the tests used in this submission. Separate bug numbers with
+# commas; if none, specify "none". It is OK to use an existing bug describing
+# the same problem. In any case, after filing this conformance submission,
+# add a comment to the bug referencing the submission tracking number
+# giving justification for the test change.
-List of skipped tests in accordance with conformance process:
+Test Bugs:
+# Tested device configuration
+# Max compute units (CL_DEVICE_MAX_COMPUTE_UNITS)
+# Max clock frequency (CL_DEVICE_MAX_CLOCK_FREQUENCY)
+# Max memory allocation size (CL_DEVICE_MAX_MEM_ALLOC_SIZE)
+# Image support (CL_DEVICE_IMAGE_SUPPORT), 1/0
+# Global memory size (CL_DEVICE_GLOBAL_MEM_SIZE)
+# Max constant buffer size (CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE)
+# Local memory size (CL_DEVICE_LOCAL_MEM_SIZE)
+# Extension Queries
+# Show the list of supported extensions per device tested, each extension string on a separate line.
diff --git a/test_conformance/thread_dimensions/main.cpp b/test_conformance/thread_dimensions/main.cpp
index 9a1ce60..bddbc0f 100644
--- a/test_conformance/thread_dimensions/main.cpp
+++ b/test_conformance/thread_dimensions/main.cpp
@@ -38,6 +38,6 @@
 int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/thread_dimensions/test_thread_dimensions.cpp b/test_conformance/thread_dimensions/test_thread_dimensions.cpp
index c8d22c6..84f5708 100644
--- a/test_conformance/thread_dimensions/test_thread_dimensions.cpp
+++ b/test_conformance/thread_dimensions/test_thread_dimensions.cpp
@@ -501,8 +501,7 @@
     log_info("Memory allocation size to use is %gMB, max workgroup size is %d.\n", max_memory_size/(1024.0*1024.0), (int)max_workgroup_size);
     while (!found_size && memory_size >= max_memory_size/8) {
-        array =
-            clCreateBuffer(context, CL_MEM_READ_WRITE, memory_size, NULL, &err);
+        array = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), memory_size, NULL, &err);
             memory_size -= max_memory_size/16;
diff --git a/test_conformance/vec_align/CMakeLists.txt b/test_conformance/vec_align/CMakeLists.txt
new file mode 100644
index 0000000..f1a42e6
--- /dev/null
+++ b/test_conformance/vec_align/CMakeLists.txt
@@ -0,0 +1,11 @@
+    globals.cpp
+        main.cpp
+        structs.cpp
+        test_vec_align.cpp
+        type_replacer.cpp
diff --git a/test_conformance/vec_align/defines.h b/test_conformance/vec_align/defines.h
new file mode 100644
index 0000000..f2bf9e7
--- /dev/null
+++ b/test_conformance/vec_align/defines.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "harness/threadTesting.h"
+#include "harness/typeWrappers.h"
+#include "harness/conversions.h"
+#include "harness/mt19937.h"
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+extern int g_arrVecSizes[NUM_VECTOR_SIZES];
+extern int g_arrVecSteps[NUM_VECTOR_SIZES];
+extern bool g_wimpyMode;
+extern const char * g_arrVecSizeNames[NUM_VECTOR_SIZES];
+extern size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES];
+// Define the buffer size that we want to block our test with
+#define BUFFER_SIZE (1024*1024)
+#define KPAGESIZE 4096
+extern ExplicitType types[];
+extern const char *g_arrTypeNames[];
+extern const size_t g_arrTypeSizes[];
diff --git a/test_conformance/vec_align/globals.cpp b/test_conformance/vec_align/globals.cpp
new file mode 100644
index 0000000..3deecad
--- /dev/null
+++ b/test_conformance/vec_align/globals.cpp
@@ -0,0 +1,59 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "defines.h"
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+int g_arrVecSizes[NUM_VECTOR_SIZES] = {1,2,3,4,8,16};
+int g_arrVecSteps[NUM_VECTOR_SIZES] = {1,2,4,4,8,16};
+const char * g_arrVecSizeNames[NUM_VECTOR_SIZES] = {"", "2","3","4","8","16"};
+size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES] = {(size_t)0,
+                           (size_t)0x1, // 2
+                           (size_t)0x3, // 3
+                           (size_t)0x3, // 4
+                           (size_t)0x7, // 8
+                           (size_t)0xf // 16
+bool g_wimpyMode = false;
+ExplicitType types[] = { kChar, kUChar,
+             kShort, kUShort,
+             kInt, kUInt,
+             kLong, kULong,
+             kFloat, kDouble,
+             kNumExplicitTypes };
+const char *g_arrTypeNames[] =
+    {
+    "char",  "uchar",
+    "short", "ushort",
+    "int",   "uint",
+    "long",  "ulong",
+    "float", "double"
+    };
+extern const size_t g_arrTypeSizes[] =
+    {
+    1, 1,
+    2, 2,
+    4, 4,
+    8, 8,
+    4, 8
+    };
diff --git a/test_conformance/vec_align/main.cpp b/test_conformance/vec_align/main.cpp
new file mode 100644
index 0000000..6894895
--- /dev/null
+++ b/test_conformance/vec_align/main.cpp
@@ -0,0 +1,41 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "harness/compat.h"
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "harness/testHarness.h"
+#if !defined(_WIN32)
+#include <unistd.h>
+test_definition test_list[] = {
+    ADD_TEST( vec_align_array ),
+    ADD_TEST( vec_align_struct ),
+    ADD_TEST( vec_align_packed_struct ),
+    ADD_TEST( vec_align_struct_arr ),
+    ADD_TEST( vec_align_packed_struct_arr ),
+const int test_num = ARRAY_SIZE( test_list );
+int main(int argc, const char *argv[])
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/vec_align/procs.h b/test_conformance/vec_align/procs.h
new file mode 100644
index 0000000..7b60e1e
--- /dev/null
+++ b/test_conformance/vec_align/procs.h
@@ -0,0 +1,39 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "harness/threadTesting.h"
+#include "harness/typeWrappers.h"
+#include "harness/conversions.h"
+#include "harness/mt19937.h"
+// The number of errors to print out for each test in the shuffle tests
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+int test_vec_align_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+int test_vec_align_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/vec_align/structs.cpp b/test_conformance/vec_align/structs.cpp
new file mode 100644
index 0000000..2e15e36
--- /dev/null
+++ b/test_conformance/vec_align/structs.cpp
@@ -0,0 +1,375 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "structs.h"
+#include "defines.h"
+#define DEBUG_MEM_ALLOC 0
+/** typedef struct _bufferStruct
+ {
+ void * m_pIn;
+ void * m_pOut;
+ cl_mem m_outBuffer;
+ cl_mem m_inBuffer;
+ size_t m_bufSize;
+ } bufferStruct;
+ */
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue)
+    clState * pResult = (clState *)malloc(sizeof(clState));
+    log_info("malloc clState * %x\n", pResult);
+    pResult->m_device = device;
+    pResult->m_context = context;
+    pResult->m_queue = queue;
+    pResult->m_kernel = NULL; pResult->m_program = NULL;
+    return pResult;
+clState * destroyClState(clState * pState)
+    clStateDestroyProgramAndKernel(pState);
+    log_info("delete (free) clState * %x\n", pState);
+    free(pState);
+    return NULL;
+int clStateMakeProgram(clState * pState, const char * prog,
+                       const char * kernelName)
+    const char * srcArr[1] = {NULL};
+    srcArr[0] = prog;
+    int err = create_single_kernel_helper(pState->m_context,
+                                          &(pState->m_program),
+                                          &(pState->m_kernel),
+                                          1, srcArr, kernelName );
+    log_info("create program and kernel\n");
+    return err;
+int runKernel(clState * pState, size_t numThreads) {
+    int err;
+    pState->m_numThreads = numThreads;
+    err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel,
+                                 1, NULL, &(pState->m_numThreads),
+                                 NULL, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel returned %d (%x)\n",
+                  err, err);
+        return -1;
+    }
+    return 0;
+void clStateDestroyProgramAndKernel(clState * pState)
+    log_info("destroy program and kernel\n");
+    if(pState->m_kernel != NULL) {
+        clReleaseKernel( pState->m_kernel );
+        pState->m_kernel = NULL;
+    }
+    if(pState->m_program != NULL) {
+        clReleaseProgram( pState->m_program );
+        pState->m_program = NULL;
+    }
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState) {
+    int error;
+    bufferStruct * pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
+    log_info("malloc bufferStruct * %x\n", pResult);
+    pResult->m_bufSizeIn = inSize;
+    pResult->m_bufSizeOut = outSize;
+    pResult->m_pIn = malloc(inSize);
+    pResult->m_pOut = malloc(outSize);
+    log_info("malloc m_pIn %x\n", pResult->m_pIn);
+    log_info("malloc m_pOut %x\n", pResult->m_pOut);
+    pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
+                                         inSize, NULL, &error);
+    if( pResult->m_inBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for input (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+    log_info("clCreateBuffer %x\n", pResult->m_inBuffer);
+    pResult->m_outBuffer = clCreateBuffer( pClState->m_context,
+                                          CL_MEM_WRITE_ONLY,
+                                          outSize,
+                                          NULL,
+                                          &error );
+    if( pResult->m_outBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for output (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+    log_info("clCreateBuffer %x\n", pResult->m_outBuffer);
+    pResult->m_bufferUploaded = false;
+    return pResult;
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState) {
+    if(destroyMe)
+    {
+        if(destroyMe->m_outBuffer != NULL) {
+            log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
+            clReleaseMemObject(destroyMe->m_outBuffer);
+            destroyMe->m_outBuffer = NULL;
+        }
+        if(destroyMe->m_inBuffer != NULL) {
+            log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
+            clReleaseMemObject(destroyMe->m_inBuffer);
+            destroyMe->m_inBuffer = NULL;
+        }
+        if(destroyMe->m_pIn != NULL) {
+            log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn);
+            free(destroyMe->m_pIn);
+            destroyMe->m_pIn = NULL;
+        }
+        if(destroyMe->m_pOut != NULL) {
+            log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut);
+            free(destroyMe->m_pOut);
+            destroyMe->m_pOut = NULL;
+        }
+        log_info("delete (free) bufferStruct * %x\n", destroyMe);
+        free((void *)destroyMe);
+        destroyMe = NULL;
+    }
+    return destroyMe;
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+                  size_t typeSize,
+                  size_t countIn, size_t countOut )
+    size_t i;
+    uint64_t start = 0;
+    switch(typeSize)
+    {
+        case 1: {
+            uint8_t* ub = (uint8_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                ub[i] = (uint8_t)start++;
+            }
+            break;
+        }
+        case 2: {
+            uint16_t* us = (uint16_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                us[i] = (uint16_t)start++;
+            }
+            break;
+        }
+        case 4: {
+            if (!g_wimpyMode) {
+                uint32_t* ui = (uint32_t *)(pBufferStruct->m_pIn);
+                for (i=0; i < countIn; ++i) {
+                    ui[i] = (uint32_t)start++;
+                }
+            }
+            else {
+                // The short test doesn't iterate over the entire 32 bit space so
+                // we alternate between positive and negative values
+                int32_t* ui = (int32_t *)(pBufferStruct->m_pIn);
+                int32_t sign = 1;
+                for (i=0; i < countIn; ++i, ++start) {
+                    ui[i] = (int32_t)start*sign;
+                    sign = sign * -1;
+                }
+            }
+            break;
+        }
+        case 8: {
+            // We don't iterate over the entire space of 64 bit so for the
+            // selects, we want to test positive and negative values
+            int64_t* ll = (int64_t *)(pBufferStruct->m_pIn);
+            int64_t sign = 1;
+            for (i=0; i < countIn; ++i, ++start) {
+                ll[i] = start*sign;
+                sign = sign * -1;
+            }
+            break;
+        }
+        default: {
+            log_error("invalid type size %x\n", (int)typeSize);
+        }
+    }
+    // pBufferStruct->m_bufSizeIn
+    // pBufferStruct->m_bufSizeOut
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState)
+    int err;
+    if( !pBufferStruct->m_bufferUploaded )
+    {
+        err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
+                                   CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
+                                   pBufferStruct->m_pIn, 0, NULL, NULL);
+        log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer);
+        if(err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+        pBufferStruct->m_bufferUploaded = true;
+    }
+    err = clSetKernelArg(pClState->m_kernel, 0,
+                         sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
+                         &(pBufferStruct->m_inBuffer));
+    // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, first arg (0)\n");
+        return -1;
+    }
+    err = clSetKernelArg(pClState->m_kernel, 1,
+                         sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
+                         &(pBufferStruct->m_outBuffer));
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, second arg (1)\n");
+        return -1;
+    }
+    // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer);
+    return 0;
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState)
+    int err;
+    err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
+                              CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
+                              pBufferStruct->m_pOut, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+    return 0;
+// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
+// and g_arrVecSizes
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+                     size_t minAlign)
+    size_t i;
+    cl_uint * targetArr = (cl_uint *)(pBufferStruct->m_pOut);
+    for(i = 0; i < pClState->m_numThreads; ++i)
+    {
+        if((targetArr[i])%minAlign != (cl_uint)0)
+        {
+            vlog_error("Error %d (of %d).  Expected a multple of %x, got %x\n",
+                       i, pClState->m_numThreads,
+                       minAlign,
+                       targetArr[i]);
+            return -1;
+        }
+    }
+    /*    log_info("\n");
+     for(i = 0; i < 4; ++i) {
+     log_info("%lx, ", targetArr[i]);
+     }
+     log_info("\n");
+     fflush(stdout); */
+    return 0;
+// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
+// and g_arrVecSizes
+int checkPackedCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+                           size_t totSize, size_t beforeSize)
+    size_t i;
+    cl_uint * targetArr = (cl_uint *)(pBufferStruct->m_pOut);
+    for(i = 0; i < pClState->m_numThreads; ++i)
+    {
+        if((targetArr[i]-beforeSize)%totSize != (cl_uint)0)
+        {
+            vlog_error("Error %d (of %d).  Expected %d more than a multple of %d, got %d \n",
+                       i, pClState->m_numThreads, beforeSize,
+                       totSize,
+                       targetArr[i]);
+            return -1;
+        }
+    }
+    /*    log_info("\n");
+     for(i = 0; i < 4; ++i) {
+     log_info("%lx, ", targetArr[i]);
+     }
+     log_info("\n");
+     fflush(stdout); */
+    return 0;
diff --git a/test_conformance/vec_align/structs.h b/test_conformance/vec_align/structs.h
new file mode 100644
index 0000000..e26b810
--- /dev/null
+++ b/test_conformance/vec_align/structs.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "testBase.h"
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+typedef struct _clState
+    cl_device_id m_device;
+    cl_context m_context;
+    cl_command_queue m_queue;
+    cl_program m_program;
+    cl_kernel m_kernel;
+    size_t m_numThreads;
+} clState;
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue);
+clState * destroyClState(clState * pState);
+int clStateMakeProgram(clState * pState, const char * prog,
+               const char * kernelName);
+void clStateDestroyProgramAndKernel(clState * pState);
+int runKernel(clState * pState, size_t numThreads);
+typedef struct _bufferStruct
+    void * m_pIn;
+    void * m_pOut;
+    cl_mem m_outBuffer;
+    cl_mem m_inBuffer;
+    size_t m_bufSizeIn, m_bufSizeOut;
+    int       m_bufferUploaded;
+} bufferStruct;
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState);
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState);
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+             size_t typeSize,
+             size_t vecWidth);
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState);
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState);
+// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
+// and g_arrVecSizes
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+             size_t minAlign);
+int checkPackedCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+               size_t totSize, size_t beforeSize);
diff --git a/test_conformance/vec_align/testBase.h b/test_conformance/vec_align/testBase.h
new file mode 100644
index 0000000..bd72e84
--- /dev/null
+++ b/test_conformance/vec_align/testBase.h
@@ -0,0 +1,28 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef _testBase_h
+#define _testBase_h
+#include "harness/compat.h"
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "procs.h"
+#endif // _testBase_h
diff --git a/test_conformance/vec_align/test_vec_align.cpp b/test_conformance/vec_align/test_vec_align.cpp
new file mode 100644
index 0000000..bc666a4
--- /dev/null
+++ b/test_conformance/vec_align/test_vec_align.cpp
@@ -0,0 +1,545 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "testBase.h"
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+#include "harness/testHarness.h"
+#include "structs.h"
+#include "defines.h"
+#include "type_replacer.h"
+size_t get_align(size_t vecSize)
+    if(vecSize == 3)
+    {
+        return 4;
+    }
+    return vecSize;
+/* // Lots of conditionals means this is not gonna be an optimal min on intel. */
+/* // That's okay, make sure we only call a few times per test, not for every */
+/* // element */
+/* size_t min_of_nonzero(size_t a, size_t b) */
+/* { */
+/*     if(a != 0 && (a<=b || b==0)) */
+/*     { */
+/*     return a; */
+/*     } */
+/*     if(b != 0 && (b<a || a==0)) */
+/*     { */
+/*     return b; */
+/*     } */
+/*     return 0; */
+/* } */
+/* size_t get_min_packed_alignment(size_t preSize, size_t typeMultiplePreSize, */
+/*                 size_t postSize, size_t typeMultiplePostSize, */
+/*                 ExplicitType kType, size_t vecSize) */
+/* { */
+/*     size_t pre_min = min_of_nonzero(preSize,  */
+/*                     typeMultiplePreSize* */
+/*                     get_explicit_type_size(kType)); */
+/*     size_t post_min = min_of_nonzero(postSize,  */
+/*                     typeMultiplePostSize* */
+/*                     get_explicit_type_size(kType)); */
+/*     size_t struct_min = min_of_nonzero(pre_min, post_min); */
+/*     size_t result =  min_of_nonzero(struct_min, get_align(vecSize) */
+/*                     *get_explicit_type_size(kType)); */
+/*     return result; */
+/* } */
+int test_vec_internal(cl_device_id deviceID, cl_context context,
+                      cl_command_queue queue, const char * pattern,
+                      const char * testName, size_t bufSize,
+                      size_t preSize, size_t typeMultiplePreSize,
+                      size_t postSize, size_t typeMultiplePostSize)
+    int err;
+    int typeIdx, vecSizeIdx;
+    char tmpBuffer[2048];
+    char srcBuffer[2048];
+    size_t preSizeBytes, postSizeBytes, typeSize, totSize;
+    clState * pClState = newClState(deviceID, context, queue);
+    bufferStruct * pBuffers =
+    newBufferStruct(bufSize, bufSize*sizeof(cl_uint)/sizeof(cl_char), pClState);
+    if(pBuffers == NULL) {
+        destroyClState(pClState);
+        vlog_error("%s : Could not create buffer\n", testName);
+        return -1;
+    }
+    for(typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
+    {
+        // Skip doubles if it is not supported otherwise enable pragma
+        if (types[typeIdx] == kDouble) {
+            if (!is_extension_available(deviceID, "cl_khr_fp64")) {
+                continue;
+            } else {
+                doReplace(tmpBuffer, 2048, pattern,
+                          ".PRAGMA.",  "#pragma OPENCL EXTENSION cl_khr_fp64: ",
+                          ".STATE.", "enable");
+            }
+        } else {
+            if (types[typeIdx] == kLong || types[typeIdx] == kULong) {
+                if (gIsEmbedded)
+                    continue;
+            }
+            doReplace(tmpBuffer, 2048, pattern,
+                      ".PRAGMA.",  " ",
+                      ".STATE.", " ");
+        }
+        typeSize = get_explicit_type_size(types[typeIdx]);
+        preSizeBytes = preSize + typeSize*typeMultiplePreSize;
+        postSizeBytes = postSize + typeSize*typeMultiplePostSize;
+        for(vecSizeIdx = 1; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)  {
+            totSize = preSizeBytes + postSizeBytes +
+            typeSize*get_align(g_arrVecSizes[vecSizeIdx]);
+            doReplace(srcBuffer, 2048, tmpBuffer,
+                      ".TYPE.",  g_arrTypeNames[typeIdx],
+                      ".NUM.", g_arrVecSizeNames[vecSizeIdx]);
+            if(srcBuffer[0] == '\0') {
+                vlog_error("%s: failed to fill source buf for type %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            // log_info("Buffer is \"\n%s\n\"\n", srcBuffer);
+            // fflush(stdout);
+            err = clStateMakeProgram(pClState, srcBuffer, testName );
+            if (err) {
+                vlog_error("%s: Error compiling \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            err = pushArgs(pBuffers, pClState);
+            if(err != 0) {
+                vlog_error("%s: failed to push args %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            // log_info("About to Run kernel\n"); fflush(stdout);
+            // now we run the kernel
+            err = runKernel(pClState,
+                            bufSize/(g_arrVecSizes[vecSizeIdx]* g_arrTypeSizes[typeIdx]));
+            if(err != 0) {
+                vlog_error("%s: runKernel fail (%ld threads) %s%s\n",
+                           testName, pClState->m_numThreads,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            // log_info("About to retrieve results\n"); fflush(stdout);
+            err = retrieveResults(pBuffers, pClState);
+            if(err != 0) {
+                vlog_error("%s: failed to retrieve results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            if(preSizeBytes+postSizeBytes == 0)
+            {
+                // log_info("About to Check Correctness\n"); fflush(stdout);
+                err = checkCorrectness(pBuffers, pClState,
+                                       get_align(g_arrVecSizes[vecSizeIdx])*
+                                       typeSize);
+            }
+            else
+            {
+                // we're checking for an aligned struct
+                err = checkPackedCorrectness(pBuffers, pClState, totSize,
+                                             preSizeBytes);
+            }
+            if(err != 0) {
+                vlog_error("%s: incorrect results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                vlog_error("%s: Source was \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            clStateDestroyProgramAndKernel(pClState);
+        }
+    }
+    destroyBufferStruct(pBuffers, pClState);
+    destroyClState(pClState);
+    // vlog_error("%s : implementation incomplete : FAIL\n", testName);
+    return 0; // -1; // fails on account of not being written.
+const char * patterns[] = {
+    ".PRAGMA..STATE.\n"
+    "__kernel void test_vec_align_array(.SRC_SCOPE. .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)(source+tid));\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct myUnpackedStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_struct(__constant .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    .SRC_SCOPE. testStruct test;\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec));\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_packed_struct(__constant .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    .SRC_SCOPE. testStruct test;\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec) - (.SRC_SCOPE. uchar *)&test);\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct myStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_struct_arr(.SRC_SCOPE. testStruct *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec));\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_packed_struct_arr(.SRC_SCOPE.  testStruct *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec) - (.SRC_SCOPE. uchar *)&(source[0]));\n"
+    "}\n",
+    // __attribute__ ((packed))
+const char * pre_substitution_arr[] = {
+    "",
+    "char c;\n",
+    "short3 s;",
+    ".TYPE.3 tPre;\n",
+    ".TYPE. arrPre[5];\n",
+    ".TYPE. arrPre[12];\n",
+    NULL
+// alignments of everything in pre_substitution_arr as raw alignments
+// 0 if such a thing is meaningless
+size_t pre_align_arr[] = {
+    0,
+    sizeof(cl_char),
+    4*sizeof(cl_short),
+    0, // taken care of in type_multiple_pre_align_arr
+    0,
+    0
+// alignments of everything in pre_substitution_arr as multiples of
+// sizeof(.TYPE.)
+// 0 if such a thing is meaningless
+size_t type_multiple_pre_align_arr[] = {
+    0,
+    0,
+    0,
+    4,
+    5,
+    12
+const char * post_substitution_arr[] = {
+    "",
+    "char cPost;\n",
+    ".TYPE. arrPost[3];\n",
+    ".TYPE. arrPost[5];\n",
+    ".TYPE.3 arrPost;\n",
+    ".TYPE. arrPost[12];\n",
+    NULL
+// alignments of everything in post_substitution_arr as raw alignments
+// 0 if such a thing is meaningless
+size_t post_align_arr[] = {
+    0,
+    sizeof(cl_char),
+    0, // taken care of in type_multiple_post_align_arr
+    0,
+    0,
+    0
+// alignments of everything in post_substitution_arr as multiples of
+// sizeof(.TYPE.)
+// 0 if such a thing is meaningless
+size_t type_multiple_post_align_arr[] = {
+    0,
+    0,
+    3,
+    5,
+    4,
+    12
+// there hsould be a packed version of this?
+int test_vec_align_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    char tmp[2048];
+    int result;
+    log_info("Testing global\n");
+    doReplace(tmp, (size_t)2048, patterns[0],
+              ".SRC_SCOPE.",  "__global",
+              ".DST_SCOPE.", "__global"); //
+    result = test_vec_internal(deviceID, context, queue, tmp,
+                               "test_vec_align_array",
+                               BUFFER_SIZE, 0, 0, 0, 0);
+    return result;
+int test_vec_align_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+    log_info("testing __private\n");
+    doReplace(tmp2, (size_t)2048, patterns[1],
+              ".SRC_SCOPE.",  "__private",
+              ".DST_SCOPE.", "__global"); //
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_struct",
+                                       512, 0, 0, 0, 0);
+            if (result != 0) {
+                return result;
+            }
+        }
+    }
+    log_info("testing __local\n");
+    doReplace(tmp2, (size_t)2048, patterns[1],
+              ".SRC_SCOPE.",  "__local",
+              ".DST_SCOPE.", "__global"); //
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_struct",
+                                       512, 0, 0, 0, 0);
+            if(result != 0) {
+                return result;
+            }
+        }
+    }
+    return 0;
+int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+    log_info("Testing __private\n");
+    doReplace(tmp2, (size_t)2048, patterns[2],
+              ".SRC_SCOPE.",  "__private",
+              ".DST_SCOPE.", "__global"); //
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_packed_struct",
+                                       512, pre_align_arr[preIdx],
+                                       type_multiple_pre_align_arr[preIdx],
+                                       post_align_arr[postIdx],
+                                       type_multiple_post_align_arr[postIdx]);
+            if(result != 0) {
+                return result;
+            }
+        }
+    }
+    log_info("testing __local\n");
+    doReplace(tmp2, (size_t)2048, patterns[2],
+              ".SRC_SCOPE.",  "__local",
+              ".DST_SCOPE.", "__global"); //
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_packed_struct",
+                                       512, pre_align_arr[preIdx],
+                                       type_multiple_pre_align_arr[preIdx],
+                                       post_align_arr[postIdx],
+                                       type_multiple_post_align_arr[postIdx]);
+            if (result != 0) {
+                return result;
+            }
+        }
+    }
+    return 0;
+int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+    log_info("testing __global\n");
+    doReplace(tmp2, (size_t)2048, patterns[3],
+              ".SRC_SCOPE.",  "__global",
+              ".DST_SCOPE.", "__global"); //
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_struct_arr",
+                                       BUFFER_SIZE, 0, 0, 0, 0);
+            if(result != 0) {
+                return result;
+            }
+        }
+    }
+    return 0;
+int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+    log_info("Testing __global\n");
+    doReplace(tmp2, (size_t)2048, patterns[4],
+              ".SRC_SCOPE.",  "__global",
+              ".DST_SCOPE.", "__global"); //
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_packed_struct_arr",
+                                       BUFFER_SIZE, pre_align_arr[preIdx],
+                                       type_multiple_pre_align_arr[preIdx],
+                                       post_align_arr[postIdx],
+                                       type_multiple_post_align_arr[postIdx]);
+            if(result != 0)
+                return result;
+        }
+    }
+    return 0;
diff --git a/test_conformance/vec_align/type_replacer.cpp b/test_conformance/vec_align/type_replacer.cpp
new file mode 100644
index 0000000..74967b2
--- /dev/null
+++ b/test_conformance/vec_align/type_replacer.cpp
@@ -0,0 +1,115 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <string.h>
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#endif // !_MSC_VER
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2)
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr1;
+    const char * ptr2;
+    size_t nJump;
+    size_t len1, len2;
+    size_t lenReplace1, lenReplace2;
+    len1 = strlen(stringToReplace1);
+    len2 = strlen(stringToReplace2);
+    lenReplace1 = strlen(replaceWith1);
+    lenReplace2 = strlen(replaceWith2);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr1 = strstr(sourcePtr, stringToReplace1);
+        ptr2 = strstr(sourcePtr, stringToReplace2);
+        if(ptr1 != NULL && (ptr2 == NULL || ptr2 > ptr1))
+        {
+            nJump = ptr1-sourcePtr;
+            if(((uintptr_t)ptr1-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len1;
+            strcpy(destPtr, replaceWith1);
+            destPtr += lenReplace1;
+        }
+        else if(ptr2 != NULL && (ptr1 == NULL || ptr1 >= ptr2))
+        {
+            nJump = ptr2-sourcePtr;
+            if(nJump > destLength-copyCount) { return -2; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len2;
+            strcpy(destPtr, replaceWith2);
+            destPtr += lenReplace2;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith)
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr;
+    size_t nJump;
+    size_t len;
+    size_t lenReplace;
+    len = strlen(stringToReplace);
+    lenReplace = strlen(replaceWith);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr = strstr(sourcePtr, stringToReplace);
+        if(ptr != NULL)
+        {
+            nJump = ptr-sourcePtr;
+            if(((uintptr_t)ptr-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len;
+            strcpy(destPtr, replaceWith);
+            destPtr += lenReplace;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
diff --git a/test_conformance/vec_align/type_replacer.h b/test_conformance/vec_align/type_replacer.h
new file mode 100644
index 0000000..f50b08d
--- /dev/null
+++ b/test_conformance/vec_align/type_replacer.h
@@ -0,0 +1,23 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <stdlib.h>
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2);
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith);
diff --git a/test_conformance/vec_step/CMakeLists.txt b/test_conformance/vec_step/CMakeLists.txt
new file mode 100644
index 0000000..c18fc1b
--- /dev/null
+++ b/test_conformance/vec_step/CMakeLists.txt
@@ -0,0 +1,11 @@
+    globals.cpp
+        test_step.cpp
+        main.cpp
+        structs.cpp
+        type_replacer.cpp
diff --git a/test_conformance/vec_step/defines.h b/test_conformance/vec_step/defines.h
new file mode 100644
index 0000000..5f364e4
--- /dev/null
+++ b/test_conformance/vec_step/defines.h
@@ -0,0 +1,41 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "harness/threadTesting.h"
+#include "harness/typeWrappers.h"
+#include "harness/conversions.h"
+#include "harness/mt19937.h"
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+extern int g_arrVecSizes[NUM_VECTOR_SIZES];
+extern int g_arrVecSteps[NUM_VECTOR_SIZES];
+extern bool g_wimpyMode;
+extern const char * g_arrVecSizeNames[NUM_VECTOR_SIZES];
+// Define the buffer size that we want to block our test with
+#define BUFFER_SIZE (1024*1024)
+#define KPAGESIZE 4096
+extern ExplicitType types[];
+extern const char *g_arrTypeNames[];
+extern const size_t g_arrTypeSizes[];
diff --git a/test_conformance/vec_step/globals.cpp b/test_conformance/vec_step/globals.cpp
new file mode 100644
index 0000000..029ae1c
--- /dev/null
+++ b/test_conformance/vec_step/globals.cpp
@@ -0,0 +1,52 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "defines.h"
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+int g_arrVecSizes[NUM_VECTOR_SIZES] = {1,2,3,4,8,16};
+int g_arrVecSteps[NUM_VECTOR_SIZES] = {1,2,4,4,8,16};
+const char * g_arrVecSizeNames[NUM_VECTOR_SIZES] = {"", "2","3","4","8","16"};
+bool g_wimpyMode = false;
+ExplicitType types[] = { kChar, kUChar,
+             kShort, kUShort,
+             kInt, kUInt,
+             kLong, kULong,
+             kFloat, kDouble,
+             kNumExplicitTypes };
+const char *g_arrTypeNames[] =
+    {
+    "char",  "uchar",
+    "short", "ushort",
+    "int",   "uint",
+    "long",  "ulong",
+    "float", "double"
+    };
+extern const size_t g_arrTypeSizes[] =
+    {
+    1, 1,
+    2, 2,
+    4, 4,
+    8, 8,
+    4, 8
+    };
diff --git a/test_conformance/vec_step/main.cpp b/test_conformance/vec_step/main.cpp
new file mode 100644
index 0000000..9742a28
--- /dev/null
+++ b/test_conformance/vec_step/main.cpp
@@ -0,0 +1,40 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "harness/compat.h"
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "harness/testHarness.h"
+#if !defined(_WIN32)
+#include <unistd.h>
+test_definition test_list[] = {
+    ADD_TEST( step_type ),
+    ADD_TEST( step_var ),
+    ADD_TEST( step_typedef_type ),
+    ADD_TEST( step_typedef_var ),
+const int test_num = ARRAY_SIZE( test_list );
+int main(int argc, const char *argv[])
+    return runTestHarness( argc, argv, test_num, test_list, false, false, 0 );
diff --git a/test_conformance/vec_step/procs.h b/test_conformance/vec_step/procs.h
new file mode 100644
index 0000000..382a36b
--- /dev/null
+++ b/test_conformance/vec_step/procs.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "harness/threadTesting.h"
+#include "harness/typeWrappers.h"
+#include "harness/conversions.h"
+#include "harness/mt19937.h"
+// The number of errors to print out for each test in the shuffle tests
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+    test_step_type,
+    test_step_var,
+    test_step_typedef_type,
+    test_step_typedef_var,
+extern int test_step_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_step_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_step_typedef_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_step_typedef_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/vec_step/structs.cpp b/test_conformance/vec_step/structs.cpp
new file mode 100644
index 0000000..b36e892
--- /dev/null
+++ b/test_conformance/vec_step/structs.cpp
@@ -0,0 +1,285 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "structs.h"
+#include "defines.h"
+/** typedef struct _bufferStruct
+ {
+ void * m_pIn;
+ void * m_pOut;
+ cl_mem m_outBuffer;
+ cl_mem m_inBuffer;
+ size_t m_bufSize;
+ } bufferStruct;
+ */
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue)
+    clState * pResult = (clState *)malloc(sizeof(clState));
+    pResult->m_device = device;
+    pResult->m_context = context;
+    pResult->m_queue = queue;
+    pResult->m_kernel = NULL; pResult->m_program = NULL;
+    return pResult;
+clState * destroyClState(clState * pState)
+    clStateDestroyProgramAndKernel(pState);
+    free(pState);
+    return NULL;
+int clStateMakeProgram(clState * pState, const char * prog,
+                       const char * kernelName)
+    const char * srcArr[1] = {NULL};
+    srcArr[0] = prog;
+    int err = create_single_kernel_helper(pState->m_context,
+                                          &(pState->m_program),
+                                          &(pState->m_kernel),
+                                          1, srcArr, kernelName );
+    return err;
+int runKernel(clState * pState, size_t numThreads) {
+    int err;
+    pState->m_numThreads = numThreads;
+    err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel,
+                                 1, NULL, &(pState->m_numThreads),
+                                 NULL, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel returned %d (%x)\n",
+                  err, err);
+        return -1;
+    }
+    return 0;
+void clStateDestroyProgramAndKernel(clState * pState)
+    if(pState->m_kernel != NULL) {
+        clReleaseKernel( pState->m_kernel );
+        pState->m_kernel = NULL;
+    }
+    if(pState->m_program != NULL) {
+        clReleaseProgram( pState->m_program );
+        pState->m_program = NULL;
+    }
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState) {
+    int error;
+    bufferStruct * pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
+    pResult->m_bufSizeIn = inSize;
+    pResult->m_bufSizeOut = outSize;
+    pResult->m_pIn = malloc(inSize);
+    pResult->m_pOut = malloc(outSize);
+    pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
+                                         inSize, NULL, &error);
+    if( pResult->m_inBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for input (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+    pResult->m_outBuffer = clCreateBuffer( pClState->m_context,
+                                          CL_MEM_WRITE_ONLY,
+                                          outSize,
+                                          NULL,
+                                          &error );
+    if( pResult->m_outBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for output (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+    return pResult;
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState) {
+    if(destroyMe)
+    {
+        if(destroyMe->m_outBuffer != NULL) {
+            clReleaseMemObject(destroyMe->m_outBuffer);
+            destroyMe->m_outBuffer = NULL;
+        }
+        if(destroyMe->m_inBuffer != NULL) {
+            clReleaseMemObject(destroyMe->m_inBuffer);
+            destroyMe->m_inBuffer = NULL;
+        }
+        if(destroyMe->m_pIn != NULL) {
+            free(destroyMe->m_pIn);
+            destroyMe->m_pIn = NULL;
+        }
+        if(destroyMe->m_pOut != NULL) {
+            free(destroyMe->m_pOut);
+            destroyMe->m_pOut = NULL;
+        }
+        free((void *)destroyMe);
+        destroyMe = NULL;
+    }
+    return destroyMe;
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+                  size_t typeSize,
+                  size_t countIn, size_t countOut )
+    size_t i;
+    uint64_t start = 0;
+    switch(typeSize)
+    {
+        case 1: {
+            uint8_t* ub = (uint8_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                ub[i] = (uint8_t)start++;
+            }
+            break;
+        }
+        case 2: {
+            uint16_t* us = (uint16_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                us[i] = (uint16_t)start++;
+            }
+            break;
+        }
+        case 4: {
+            if (!g_wimpyMode) {
+                uint32_t* ui = (uint32_t *)(pBufferStruct->m_pIn);
+                for (i=0; i < countIn; ++i) {
+                    ui[i] = (uint32_t)start++;
+                }
+            }
+            else {
+                // The short test doesn't iterate over the entire 32 bit space so
+                // we alternate between positive and negative values
+                int32_t* ui = (int32_t *)(pBufferStruct->m_pIn);
+                int32_t sign = 1;
+                for (i=0; i < countIn; ++i, ++start) {
+                    ui[i] = (int32_t)start*sign;
+                    sign = sign * -1;
+                }
+            }
+            break;
+        }
+        case 8: {
+            // We don't iterate over the entire space of 64 bit so for the
+            // selects, we want to test positive and negative values
+            int64_t* ll = (int64_t *)(pBufferStruct->m_pIn);
+            int64_t sign = 1;
+            for (i=0; i < countIn; ++i, ++start) {
+                ll[i] = start*sign;
+                sign = sign * -1;
+            }
+            break;
+        }
+        default: {
+            log_error("invalid type size %x\n", (int)typeSize);
+        }
+    }
+    // pBufferStruct->m_bufSizeIn
+    // pBufferStruct->m_bufSizeOut
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState)
+    int err;
+    err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
+                               CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
+                               pBufferStruct->m_pIn, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clSetKernelArg(pClState->m_kernel, 0,
+                         sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
+                         &(pBufferStruct->m_inBuffer));
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, first arg (0)\n");
+        return -1;
+    }
+    err = clSetKernelArg(pClState->m_kernel, 1,
+                         sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
+                         &(pBufferStruct->m_outBuffer));
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, second arg (1)\n");
+        return -1;
+    }
+    return 0;
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState)
+    int err;
+    err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
+                              CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
+                              pBufferStruct->m_pOut, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+    return 0;
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+                     size_t typeSize,
+                     size_t vecWidth)
+    size_t i;
+    cl_int targetSize = (cl_int) vecWidth;
+    cl_int * targetArr = (cl_int *)(pBufferStruct->m_pOut);
+    if(targetSize == 3)
+    {
+        targetSize = 4; // hack for 4-aligned vec3 types
+    }
+    for(i = 0; i < pClState->m_numThreads; ++i)
+    {
+        if(targetArr[i] != targetSize)
+        {
+            vlog_error("Error %ld (of %ld).  Expected %d, got %d\n",
+                       i, pClState->m_numThreads,
+                       targetSize, targetArr[i]);
+            return -1;
+        }
+    }
+    return 0;
diff --git a/test_conformance/vec_step/structs.h b/test_conformance/vec_step/structs.h
new file mode 100644
index 0000000..37e5524
--- /dev/null
+++ b/test_conformance/vec_step/structs.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "testBase.h"
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+typedef struct _clState
+    cl_device_id m_device;
+    cl_context m_context;
+    cl_command_queue m_queue;
+    cl_program m_program;
+    cl_kernel m_kernel;
+    size_t m_numThreads;
+} clState;
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue);
+clState * destroyClState(clState * pState);
+int clStateMakeProgram(clState * pState, const char * prog,
+               const char * kernelName);
+void clStateDestroyProgramAndKernel(clState * pState);
+int runKernel(clState * pState, size_t numThreads);
+typedef struct _bufferStruct
+    void * m_pIn;
+    void * m_pOut;
+    cl_mem m_outBuffer;
+    cl_mem m_inBuffer;
+    size_t m_bufSizeIn, m_bufSizeOut;
+} bufferStruct;
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState);
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState);
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+             size_t typeSize,
+             size_t vecWidth);
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState);
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState);
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+             size_t typeSize,
+             size_t vecWidth);
diff --git a/test_conformance/vec_step/testBase.h b/test_conformance/vec_step/testBase.h
new file mode 100644
index 0000000..bd72e84
--- /dev/null
+++ b/test_conformance/vec_step/testBase.h
@@ -0,0 +1,28 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef _testBase_h
+#define _testBase_h
+#include "harness/compat.h"
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "procs.h"
+#endif // _testBase_h
diff --git a/test_conformance/vec_step/test_step.cpp b/test_conformance/vec_step/test_step.cpp
new file mode 100644
index 0000000..a2c57c3
--- /dev/null
+++ b/test_conformance/vec_step/test_step.cpp
@@ -0,0 +1,252 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "testBase.h"
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+#include "harness/testHarness.h"
+#include "structs.h"
+#include "defines.h"
+#include "type_replacer.h"
+ test_step_type,
+ test_step_var,
+ test_step_typedef_type,
+ test_step_typedef_var,
+ */
+int test_step_internal(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char * pattern, const char * testName)
+    int err;
+    int typeIdx, vecSizeIdx;
+    char tempBuffer[2048];
+    clState * pClState = newClState(deviceID, context, queue);
+    bufferStruct * pBuffers =
+    newBufferStruct(BUFFER_SIZE, BUFFER_SIZE, pClState);
+    if(pBuffers == NULL) {
+        destroyClState(pClState);
+        vlog_error("%s : Could not create buffer\n", testName);
+        return -1;
+    }
+    //detect whether profile of the device is embedded
+    char profile[1024] = "";
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
+        return -1;
+    }
+    gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
+    for(typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
+    {
+        if( types[ typeIdx ] == kDouble )
+        {
+            // If we're testing doubles, we need to check for support first
+            if( !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            {
+                log_info( "Not testing doubles (unsupported on this device)\n" );
+                continue;
+            }
+        }
+    if( types[ typeIdx ] == kLong || types[ typeIdx ] == kULong )
+    {
+        // If we're testing long/ulong, we need to check for embedded support
+        if( gIsEmbedded && !is_extension_available( deviceID, "cles_khr_int64") )
+        {
+        log_info( "Not testing longs (unsupported on this embedded device)\n" );
+        continue;
+        }
+    }
+        char srcBuffer[2048];
+        doSingleReplace(tempBuffer, 2048, pattern,
+                        ".EXTENSIONS.", types[typeIdx] == kDouble
+                            ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                            : "");
+        for(vecSizeIdx = 0; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)
+        {
+            doReplace(srcBuffer, 2048, tempBuffer,
+                      ".TYPE.",  g_arrTypeNames[typeIdx],
+                      ".NUM.", g_arrVecSizeNames[vecSizeIdx]);
+            if(srcBuffer[0] == '\0') {
+                vlog_error("%s: failed to fill source buf for type %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            err = clStateMakeProgram(pClState, srcBuffer, testName );
+            if (err)
+            {
+                vlog_error("%s: Error compiling \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            err = pushArgs(pBuffers, pClState);
+            if(err != 0)
+            {
+                vlog_error("%s: failed to push args %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            // now we run the kernel
+            err = runKernel(pClState, 1024);
+            if(err != 0)
+            {
+                vlog_error("%s: runKernel fail (%ld threads) %s%s\n",
+                           testName, pClState->m_numThreads,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            err = retrieveResults(pBuffers, pClState);
+            if(err != 0)
+            {
+                vlog_error("%s: failed to retrieve results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+            err = checkCorrectness(pBuffers, pClState,
+                                   g_arrTypeSizes[typeIdx],
+                                   g_arrVecSizes[vecSizeIdx]);
+            if(err != 0)
+            {
+                vlog_error("%s: incorrect results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                vlog_error("%s: Source was \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+        }
+    }
+    destroyBufferStruct(pBuffers, pClState);
+    destroyClState(pClState);
+    // vlog_error("%s : implementation incomplete : FAIL\n", testName);
+    return 0; // -1; // fails on account of not being written.
+const char * patterns[] = {
+    ".EXTENSIONS.\n"
+    "__kernel void test_step_type(__global .TYPE..NUM. *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(.TYPE..NUM.);\n"
+    "\n"
+    "}\n",
+    ".EXTENSIONS.\n"
+    "__kernel void test_step_var(__global .TYPE..NUM. *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(source[tid]);\n"
+    "\n"
+    "}\n",
+    ".EXTENSIONS.\n"
+    " typedef .TYPE..NUM. TypeToTest;\n"
+    "__kernel void test_step_typedef_type(__global TypeToTest *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(TypeToTest);\n"
+    "\n"
+    "}\n",
+    ".EXTENSIONS.\n"
+    " typedef .TYPE..NUM. TypeToTest;\n"
+    "__kernel void test_step_typedef_var(__global TypeToTest *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(source[tid]);\n"
+    "\n"
+    "}\n",
+ test_step_type,
+ test_step_var,
+ test_step_typedef_type,
+ test_step_typedef_var,
+ */
+int test_step_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    return test_step_internal(deviceID, context, queue, patterns[0],
+                              "test_step_type");
+int test_step_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    return test_step_internal(deviceID, context, queue, patterns[1],
+                              "test_step_var");
+int test_step_typedef_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    return test_step_internal(deviceID, context, queue, patterns[2],
+                              "test_step_typedef_type");
+int test_step_typedef_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    return test_step_internal(deviceID, context, queue, patterns[3],
+                              "test_step_typedef_var");
diff --git a/test_conformance/vec_step/type_replacer.cpp b/test_conformance/vec_step/type_replacer.cpp
new file mode 100644
index 0000000..74967b2
--- /dev/null
+++ b/test_conformance/vec_step/type_replacer.cpp
@@ -0,0 +1,115 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <string.h>
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#endif // !_MSC_VER
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2)
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr1;
+    const char * ptr2;
+    size_t nJump;
+    size_t len1, len2;
+    size_t lenReplace1, lenReplace2;
+    len1 = strlen(stringToReplace1);
+    len2 = strlen(stringToReplace2);
+    lenReplace1 = strlen(replaceWith1);
+    lenReplace2 = strlen(replaceWith2);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr1 = strstr(sourcePtr, stringToReplace1);
+        ptr2 = strstr(sourcePtr, stringToReplace2);
+        if(ptr1 != NULL && (ptr2 == NULL || ptr2 > ptr1))
+        {
+            nJump = ptr1-sourcePtr;
+            if(((uintptr_t)ptr1-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len1;
+            strcpy(destPtr, replaceWith1);
+            destPtr += lenReplace1;
+        }
+        else if(ptr2 != NULL && (ptr1 == NULL || ptr1 >= ptr2))
+        {
+            nJump = ptr2-sourcePtr;
+            if(nJump > destLength-copyCount) { return -2; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len2;
+            strcpy(destPtr, replaceWith2);
+            destPtr += lenReplace2;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith)
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr;
+    size_t nJump;
+    size_t len;
+    size_t lenReplace;
+    len = strlen(stringToReplace);
+    lenReplace = strlen(replaceWith);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr = strstr(sourcePtr, stringToReplace);
+        if(ptr != NULL)
+        {
+            nJump = ptr-sourcePtr;
+            if(((uintptr_t)ptr-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len;
+            strcpy(destPtr, replaceWith);
+            destPtr += lenReplace;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
diff --git a/test_conformance/vec_step/type_replacer.h b/test_conformance/vec_step/type_replacer.h
new file mode 100644
index 0000000..f50b08d
--- /dev/null
+++ b/test_conformance/vec_step/type_replacer.h
@@ -0,0 +1,23 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <stdlib.h>
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2);
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith);
diff --git a/test_conformance/vectors/CMakeLists.txt b/test_conformance/vectors/CMakeLists.txt
deleted file mode 100644
index 278de20..0000000
--- a/test_conformance/vectors/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-    globals.cpp
-        main.cpp
-        structs.cpp
-        test_step.cpp
-        test_vec_align.cpp
-        type_replacer.cpp
diff --git a/test_conformance/vectors/defines.h b/test_conformance/vectors/defines.h
deleted file mode 100644
index c96c3da..0000000
--- a/test_conformance/vectors/defines.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "harness/errorHelpers.h"
-#include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
-#include "harness/typeWrappers.h"
-#include "harness/conversions.h"
-#include "harness/mt19937.h"
-// 1,2,3,4,8,16 or
-// 1,2,4,8,16,3
-extern int g_arrVecSizes[NUM_VECTOR_SIZES];
-extern int g_arrVecSteps[NUM_VECTOR_SIZES];
-extern bool g_wimpyMode;
-extern const char *g_arrVecSizeNames[NUM_VECTOR_SIZES];
-extern size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES];
-// Define the buffer size that we want to block our test with
-#define BUFFER_SIZE (1024 * 1024)
-#define KPAGESIZE 4096
-extern ExplicitType types[];
-extern const char *g_arrTypeNames[];
-extern const size_t g_arrTypeSizes[];
diff --git a/test_conformance/vectors/globals.cpp b/test_conformance/vectors/globals.cpp
deleted file mode 100644
index 6dee6d9..0000000
--- a/test_conformance/vectors/globals.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "defines.h"
-// 1,2,3,4,8,16 or
-// 1,2,4,8,16,3
-int g_arrVecSizes[NUM_VECTOR_SIZES] = { 1, 2, 3, 4, 8, 16 };
-int g_arrVecSteps[NUM_VECTOR_SIZES] = { 1, 2, 4, 4, 8, 16 };
-const char *g_arrVecSizeNames[NUM_VECTOR_SIZES] = {
-    "", "2", "3", "4", "8", "16"
-size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES] = {
-    (size_t)0,
-    (size_t)0x1, // 2
-    (size_t)0x3, // 3
-    (size_t)0x3, // 4
-    (size_t)0x7, // 8
-    (size_t)0xf // 16
-bool g_wimpyMode = false;
-ExplicitType types[] = {
-    kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
-    kULong, kFloat, kDouble, kNumExplicitTypes
-const char *g_arrTypeNames[] = { "char", "uchar", "short", "ushort", "int",
-                                 "uint", "long",  "ulong", "float",  "double" };
-extern const size_t g_arrTypeSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
diff --git a/test_conformance/vectors/main.cpp b/test_conformance/vectors/main.cpp
deleted file mode 100644
index e499faf..0000000
--- a/test_conformance/vectors/main.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "harness/compat.h"
-#include <stdio.h>
-#include <string.h>
-#include "procs.h"
-#include "harness/testHarness.h"
-#if !defined(_WIN32)
-#include <unistd.h>
-test_definition test_list[] = {
-    ADD_TEST(step_type),
-    ADD_TEST(step_var),
-    ADD_TEST(step_typedef_type),
-    ADD_TEST(step_typedef_var),
-    ADD_TEST(vec_align_array),
-    ADD_TEST(vec_align_struct),
-    ADD_TEST(vec_align_packed_struct),
-    ADD_TEST(vec_align_struct_arr),
-    ADD_TEST(vec_align_packed_struct_arr),
-const int test_num = ARRAY_SIZE(test_list);
-int main(int argc, const char *argv[])
-    return runTestHarness(argc, argv, test_num, test_list, false, 0);
diff --git a/test_conformance/vectors/procs.h b/test_conformance/vectors/procs.h
deleted file mode 100644
index db423a6..0000000
--- a/test_conformance/vectors/procs.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "harness/errorHelpers.h"
-#include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
-#include "harness/typeWrappers.h"
-#include "harness/conversions.h"
-#include "harness/mt19937.h"
-// The number of errors to print out for each test in the shuffle tests
-extern int create_program_and_kernel(const char *source,
-                                     const char *kernel_name,
-                                     cl_program *program_ret,
-                                     cl_kernel *kernel_ret);
-extern int test_step_type(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements);
-extern int test_step_var(cl_device_id deviceID, cl_context context,
-                         cl_command_queue queue, int num_elements);
-extern int test_step_typedef_type(cl_device_id deviceID, cl_context context,
-                                  cl_command_queue queue, int num_elements);
-extern int test_step_typedef_var(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
-int test_vec_align_array(cl_device_id deviceID, cl_context context,
-                         cl_command_queue queue, int num_elements);
-int test_vec_align_struct(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements);
-int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements);
-int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements);
-int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements);
diff --git a/test_conformance/vectors/structs.cpp b/test_conformance/vectors/structs.cpp
deleted file mode 100644
index 9bfa389..0000000
--- a/test_conformance/vectors/structs.cpp
+++ /dev/null
@@ -1,405 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "structs.h"
-#include "defines.h"
-#define DEBUG_MEM_ALLOC 0
-/** typedef struct _bufferStruct
- {
- void * m_pIn;
- void * m_pOut;
- cl_mem m_outBuffer;
- cl_mem m_inBuffer;
- size_t m_bufSize;
- } bufferStruct;
- */
-clState *newClState(cl_device_id device, cl_context context,
-                    cl_command_queue queue)
-    clState *pResult = (clState *)malloc(sizeof(clState));
-    log_info("malloc clState * %x\n", pResult);
-    pResult->m_device = device;
-    pResult->m_context = context;
-    pResult->m_queue = queue;
-    pResult->m_kernel = NULL;
-    pResult->m_program = NULL;
-    return pResult;
-clState *destroyClState(clState *pState)
-    clStateDestroyProgramAndKernel(pState);
-    log_info("delete (free) clState * %x\n", pState);
-    free(pState);
-    return NULL;
-int clStateMakeProgram(clState *pState, const char *prog,
-                       const char *kernelName)
-    const char *srcArr[1] = { NULL };
-    srcArr[0] = prog;
-    int err =
-        create_single_kernel_helper(pState->m_context, &(pState->m_program),
-                                    &(pState->m_kernel), 1, srcArr, kernelName);
-    log_info("create program and kernel\n");
-    return err;
-int runKernel(clState *pState, size_t numThreads)
-    int err;
-    pState->m_numThreads = numThreads;
-    err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel, 1, NULL,
-                                 &(pState->m_numThreads), NULL, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel returned %d (%x)\n", err, err);
-        return -1;
-    }
-    return 0;
-void clStateDestroyProgramAndKernel(clState *pState)
-    log_info("destroy program and kernel\n");
-    if (pState->m_kernel != NULL)
-    {
-        clReleaseKernel(pState->m_kernel);
-        pState->m_kernel = NULL;
-    }
-    if (pState->m_program != NULL)
-    {
-        clReleaseProgram(pState->m_program);
-        pState->m_program = NULL;
-    }
-bufferStruct *newBufferStruct(size_t inSize, size_t outSize, clState *pClState)
-    int error;
-    bufferStruct *pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
-    log_info("malloc bufferStruct * %x\n", pResult);
-    pResult->m_bufSizeIn = inSize;
-    pResult->m_bufSizeOut = outSize;
-    pResult->m_pIn = malloc(inSize);
-    pResult->m_pOut = malloc(outSize);
-    log_info("malloc m_pIn %x\n", pResult->m_pIn);
-    log_info("malloc m_pOut %x\n", pResult->m_pOut);
-    pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
-                                         inSize, NULL, &error);
-    if (pResult->m_inBuffer == NULL)
-    {
-        vlog_error("clCreateArray failed for input (%d)\n", error);
-        return destroyBufferStruct(pResult, pClState);
-    }
-    log_info("clCreateBuffer %x\n", pResult->m_inBuffer);
-    pResult->m_outBuffer = clCreateBuffer(
-        pClState->m_context, CL_MEM_WRITE_ONLY, outSize, NULL, &error);
-    if (pResult->m_outBuffer == NULL)
-    {
-        vlog_error("clCreateArray failed for output (%d)\n", error);
-        return destroyBufferStruct(pResult, pClState);
-    }
-    log_info("clCreateBuffer %x\n", pResult->m_outBuffer);
-    pResult->m_bufferUploaded = false;
-    return pResult;
-bufferStruct *destroyBufferStruct(bufferStruct *destroyMe, clState *pClState)
-    if (destroyMe)
-    {
-        if (destroyMe->m_outBuffer != NULL)
-        {
-            log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
-            clReleaseMemObject(destroyMe->m_outBuffer);
-            destroyMe->m_outBuffer = NULL;
-        }
-        if (destroyMe->m_inBuffer != NULL)
-        {
-            log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
-            clReleaseMemObject(destroyMe->m_inBuffer);
-            destroyMe->m_inBuffer = NULL;
-        }
-        if (destroyMe->m_pIn != NULL)
-        {
-            log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn);
-            free(destroyMe->m_pIn);
-            destroyMe->m_pIn = NULL;
-        }
-        if (destroyMe->m_pOut != NULL)
-        {
-            log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut);
-            free(destroyMe->m_pOut);
-            destroyMe->m_pOut = NULL;
-        }
-        log_info("delete (free) bufferStruct * %x\n", destroyMe);
-        free((void *)destroyMe);
-        destroyMe = NULL;
-    }
-    return destroyMe;
-void initContents(bufferStruct *pBufferStruct, clState *pClState,
-                  size_t typeSize, size_t countIn, size_t countOut)
-    size_t i;
-    uint64_t start = 0;
-    switch (typeSize)
-    {
-        case 1: {
-            uint8_t *ub = (uint8_t *)(pBufferStruct->m_pIn);
-            for (i = 0; i < countIn; ++i)
-            {
-                ub[i] = (uint8_t)start++;
-            }
-            break;
-        }
-        case 2: {
-            uint16_t *us = (uint16_t *)(pBufferStruct->m_pIn);
-            for (i = 0; i < countIn; ++i)
-            {
-                us[i] = (uint16_t)start++;
-            }
-            break;
-        }
-        case 4: {
-            if (!g_wimpyMode)
-            {
-                uint32_t *ui = (uint32_t *)(pBufferStruct->m_pIn);
-                for (i = 0; i < countIn; ++i)
-                {
-                    ui[i] = (uint32_t)start++;
-                }
-            }
-            else
-            {
-                // The short test doesn't iterate over the entire 32 bit space
-                // so we alternate between positive and negative values
-                int32_t *ui = (int32_t *)(pBufferStruct->m_pIn);
-                int32_t sign = 1;
-                for (i = 0; i < countIn; ++i, ++start)
-                {
-                    ui[i] = (int32_t)start * sign;
-                    sign = sign * -1;
-                }
-            }
-            break;
-        }
-        case 8: {
-            // We don't iterate over the entire space of 64 bit so for the
-            // selects, we want to test positive and negative values
-            int64_t *ll = (int64_t *)(pBufferStruct->m_pIn);
-            int64_t sign = 1;
-            for (i = 0; i < countIn; ++i, ++start)
-            {
-                ll[i] = start * sign;
-                sign = sign * -1;
-            }
-            break;
-        }
-        default: {
-            log_error("invalid type size %x\n", (int)typeSize);
-        }
-    }
-    // pBufferStruct->m_bufSizeIn
-    // pBufferStruct->m_bufSizeOut
-int pushArgs(bufferStruct *pBufferStruct, clState *pClState)
-    int err;
-    if (!pBufferStruct->m_bufferUploaded)
-    {
-        err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
-                                   CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
-                                   pBufferStruct->m_pIn, 0, NULL, NULL);
-        log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueWriteBuffer failed\n");
-            return -1;
-        }
-        pBufferStruct->m_bufferUploaded = true;
-    }
-    err = clSetKernelArg(
-        pClState->m_kernel, 0,
-        sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
-        &(pBufferStruct->m_inBuffer));
-    // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed, first arg (0)\n");
-        return -1;
-    }
-    err = clSetKernelArg(
-        pClState->m_kernel, 1,
-        sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
-        &(pBufferStruct->m_outBuffer));
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed, second arg (1)\n");
-        return -1;
-    }
-    // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer);
-    return 0;
-int retrieveResults(bufferStruct *pBufferStruct, clState *pClState)
-    int err;
-    err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
-                              CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
-                              pBufferStruct->m_pOut, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-    return 0;
-// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
-// and g_arrVecSizes
-int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState,
-                          size_t minAlign)
-    size_t i;
-    cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
-    for (i = 0; i < pClState->m_numThreads; ++i)
-    {
-        if ((targetArr[i]) % minAlign != (cl_uint)0)
-        {
-            vlog_error("Error %d (of %d).  Expected a multple of %x, got %x\n",
-                       i, pClState->m_numThreads, minAlign, targetArr[i]);
-            return -1;
-        }
-    }
-    /*    log_info("\n");
-     for(i = 0; i < 4; ++i) {
-     log_info("%lx, ", targetArr[i]);
-     }
-     log_info("\n");
-     fflush(stdout); */
-    return 0;
-int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState,
-                         size_t typeSize, size_t vecWidth)
-    size_t i;
-    cl_int targetSize = (cl_int)vecWidth;
-    cl_int *targetArr = (cl_int *)(pBufferStruct->m_pOut);
-    if (targetSize == 3)
-    {
-        targetSize = 4; // hack for 4-aligned vec3 types
-    }
-    for (i = 0; i < pClState->m_numThreads; ++i)
-    {
-        if (targetArr[i] != targetSize)
-        {
-            vlog_error("Error %ld (of %ld).  Expected %d, got %d\n", i,
-                       pClState->m_numThreads, targetSize, targetArr[i]);
-            return -1;
-        }
-    }
-    return 0;
-// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
-// and g_arrVecSizes
-int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState,
-                           size_t totSize, size_t beforeSize)
-    size_t i;
-    cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
-    for (i = 0; i < pClState->m_numThreads; ++i)
-    {
-        if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0)
-        {
-            vlog_error("Error %d (of %d).  Expected %d more than a multple of "
-                       "%d, got %d \n",
-                       i, pClState->m_numThreads, beforeSize, totSize,
-                       targetArr[i]);
-            return -1;
-        }
-    }
-    /*    log_info("\n");
-     for(i = 0; i < 4; ++i) {
-     log_info("%lx, ", targetArr[i]);
-     }
-     log_info("\n");
-     fflush(stdout); */
-    return 0;
diff --git a/test_conformance/vectors/structs.h b/test_conformance/vectors/structs.h
deleted file mode 100644
index c6a1725..0000000
--- a/test_conformance/vectors/structs.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-typedef struct _clState
-    cl_device_id m_device;
-    cl_context m_context;
-    cl_command_queue m_queue;
-    cl_program m_program;
-    cl_kernel m_kernel;
-    size_t m_numThreads;
-} clState;
-clState* newClState(cl_device_id device, cl_context context,
-                    cl_command_queue queue);
-clState* destroyClState(clState* pState);
-int clStateMakeProgram(clState* pState, const char* prog,
-                       const char* kernelName);
-void clStateDestroyProgramAndKernel(clState* pState);
-int runKernel(clState* pState, size_t numThreads);
-typedef struct _bufferStruct
-    void* m_pIn;
-    void* m_pOut;
-    cl_mem m_outBuffer;
-    cl_mem m_inBuffer;
-    size_t m_bufSizeIn, m_bufSizeOut;
-    int m_bufferUploaded;
-} bufferStruct;
-bufferStruct* newBufferStruct(size_t inSize, size_t outSize, clState* pClState);
-bufferStruct* destroyBufferStruct(bufferStruct* destroyMe, clState* pClState);
-void initContents(bufferStruct* pBufferStruct, clState* pClState,
-                  size_t typeSize, size_t vecWidth);
-int pushArgs(bufferStruct* pBufferStruct, clState* pClState);
-int retrieveResults(bufferStruct* pBufferStruct, clState* pClState);
-int checkCorrectnessStep(bufferStruct* pBufferStruct, clState* pClState,
-                         size_t typeSize, size_t vecWidth);
-// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
-// and g_arrVecSizes
-int checkCorrectnessAlign(bufferStruct* pBufferStruct, clState* pClState,
-                          size_t minAlign);
-int checkPackedCorrectness(bufferStruct* pBufferStruct, clState* pClState,
-                           size_t totSize, size_t beforeSize);
diff --git a/test_conformance/vectors/testBase.h b/test_conformance/vectors/testBase.h
deleted file mode 100644
index 63086d7..0000000
--- a/test_conformance/vectors/testBase.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef _testBase_h
-#define _testBase_h
-#include "harness/compat.h"
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "procs.h"
-#endif // _testBase_h
diff --git a/test_conformance/vectors/test_step.cpp b/test_conformance/vectors/test_step.cpp
deleted file mode 100644
index 2f6ad18..0000000
--- a/test_conformance/vectors/test_step.cpp
+++ /dev/null
@@ -1,260 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-#include "harness/testHarness.h"
-#include "structs.h"
-#include "defines.h"
-#include "type_replacer.h"
- test_step_type,
- test_step_var,
- test_step_typedef_type,
- test_step_typedef_var,
- */
-int test_step_internal(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, const char* pattern,
-                       const char* testName)
-    int err;
-    int typeIdx, vecSizeIdx;
-    char tempBuffer[2048];
-    clState* pClState = newClState(deviceID, context, queue);
-    bufferStruct* pBuffers =
-        newBufferStruct(BUFFER_SIZE, BUFFER_SIZE, pClState);
-    if (pBuffers == NULL)
-    {
-        destroyClState(pClState);
-        vlog_error("%s : Could not create buffer\n", testName);
-        return -1;
-    }
-    // detect whether profile of the device is embedded
-    char profile[1024] = "";
-    err = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile,
-                          NULL);
-    if (err)
-    {
-        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n");
-        return -1;
-    }
-    gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
-    for (typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
-    {
-        if (types[typeIdx] == kDouble)
-        {
-            // If we're testing doubles, we need to check for support first
-            if (!is_extension_available(deviceID, "cl_khr_fp64"))
-            {
-                log_info("Not testing doubles (unsupported on this device)\n");
-                continue;
-            }
-        }
-        if (types[typeIdx] == kLong || types[typeIdx] == kULong)
-        {
-            // If we're testing long/ulong, we need to check for embedded
-            // support
-            if (gIsEmbedded
-                && !is_extension_available(deviceID, "cles_khr_int64"))
-            {
-                log_info("Not testing longs (unsupported on this embedded "
-                         "device)\n");
-                continue;
-            }
-        }
-        char srcBuffer[2048];
-        doSingleReplace(tempBuffer, 2048, pattern, ".EXTENSIONS.",
-                        types[typeIdx] == kDouble
-                            ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
-                            : "");
-        for (vecSizeIdx = 0; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)
-        {
-            doReplace(srcBuffer, 2048, tempBuffer, ".TYPE.",
-                      g_arrTypeNames[typeIdx], ".NUM.",
-                      g_arrVecSizeNames[vecSizeIdx]);
-            if (srcBuffer[0] == '\0')
-            {
-                vlog_error("%s: failed to fill source buf for type %s%s\n",
-                           testName, g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            err = clStateMakeProgram(pClState, srcBuffer, testName);
-            if (err)
-            {
-                vlog_error("%s: Error compiling \"\n%s\n\"", testName,
-                           srcBuffer);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            err = pushArgs(pBuffers, pClState);
-            if (err != 0)
-            {
-                vlog_error("%s: failed to push args %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            // now we run the kernel
-            err = runKernel(pClState, 1024);
-            if (err != 0)
-            {
-                vlog_error("%s: runKernel fail (%ld threads) %s%s\n", testName,
-                           pClState->m_numThreads, g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            err = retrieveResults(pBuffers, pClState);
-            if (err != 0)
-            {
-                vlog_error("%s: failed to retrieve results %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            err = checkCorrectnessStep(pBuffers, pClState,
-                                       g_arrTypeSizes[typeIdx],
-                                       g_arrVecSizes[vecSizeIdx]);
-            if (err != 0)
-            {
-                vlog_error("%s: incorrect results %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                vlog_error("%s: Source was \"\n%s\n\"", testName, srcBuffer);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-        }
-    }
-    destroyBufferStruct(pBuffers, pClState);
-    destroyClState(pClState);
-    // vlog_error("%s : implementation incomplete : FAIL\n", testName);
-    return 0; // -1; // fails on account of not being written.
-static const char* patterns[] = {
-    ".EXTENSIONS.\n"
-    "__kernel void test_step_type(__global .TYPE..NUM. *source, __global int "
-    "*dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = vec_step(.TYPE..NUM.);\n"
-    "\n"
-    "}\n",
-    ".EXTENSIONS.\n"
-    "__kernel void test_step_var(__global .TYPE..NUM. *source, __global int "
-    "*dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = vec_step(source[tid]);\n"
-    "\n"
-    "}\n",
-    ".EXTENSIONS.\n"
-    " typedef .TYPE..NUM. TypeToTest;\n"
-    "__kernel void test_step_typedef_type(__global TypeToTest *source, "
-    "__global int *dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = vec_step(TypeToTest);\n"
-    "\n"
-    "}\n",
-    ".EXTENSIONS.\n"
-    " typedef .TYPE..NUM. TypeToTest;\n"
-    "__kernel void test_step_typedef_var(__global TypeToTest *source, __global "
-    "int *dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = vec_step(source[tid]);\n"
-    "\n"
-    "}\n",
- test_step_type,
- test_step_var,
- test_step_typedef_type,
- test_step_typedef_var,
- */
-int test_step_type(cl_device_id deviceID, cl_context context,
-                   cl_command_queue queue, int num_elements)
-    return test_step_internal(deviceID, context, queue, patterns[0],
-                              "test_step_type");
-int test_step_var(cl_device_id deviceID, cl_context context,
-                  cl_command_queue queue, int num_elements)
-    return test_step_internal(deviceID, context, queue, patterns[1],
-                              "test_step_var");
-int test_step_typedef_type(cl_device_id deviceID, cl_context context,
-                           cl_command_queue queue, int num_elements)
-    return test_step_internal(deviceID, context, queue, patterns[2],
-                              "test_step_typedef_type");
-int test_step_typedef_var(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements)
-    return test_step_internal(deviceID, context, queue, patterns[3],
-                              "test_step_typedef_var");
diff --git a/test_conformance/vectors/test_vec_align.cpp b/test_conformance/vectors/test_vec_align.cpp
deleted file mode 100644
index 2f392f5..0000000
--- a/test_conformance/vectors/test_vec_align.cpp
+++ /dev/null
@@ -1,540 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "testBase.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-#include "harness/testHarness.h"
-#include "structs.h"
-#include "defines.h"
-#include "type_replacer.h"
-size_t get_align(size_t vecSize)
-    if (vecSize == 3)
-    {
-        return 4;
-    }
-    return vecSize;
-/* // Lots of conditionals means this is not gonna be an optimal min on intel.
- */
-/* // That's okay, make sure we only call a few times per test, not for every */
-/* // element */
-/* size_t min_of_nonzero(size_t a, size_t b) */
-/* { */
-/*     if(a != 0 && (a<=b || b==0)) */
-/*     { */
-/*     return a; */
-/*     } */
-/*     if(b != 0 && (b<a || a==0)) */
-/*     { */
-/*     return b; */
-/*     } */
-/*     return 0; */
-/* } */
-/* size_t get_min_packed_alignment(size_t preSize, size_t typeMultiplePreSize,
- */
-/*                 size_t postSize, size_t typeMultiplePostSize, */
-/*                 ExplicitType kType, size_t vecSize) */
-/* { */
-/*     size_t pre_min = min_of_nonzero(preSize,  */
-/*                     typeMultiplePreSize* */
-/*                     get_explicit_type_size(kType)); */
-/*     size_t post_min = min_of_nonzero(postSize,  */
-/*                     typeMultiplePostSize* */
-/*                     get_explicit_type_size(kType)); */
-/*     size_t struct_min = min_of_nonzero(pre_min, post_min); */
-/*     size_t result =  min_of_nonzero(struct_min, get_align(vecSize) */
-/*                     *get_explicit_type_size(kType)); */
-/*     return result; */
-/* } */
-int test_vec_internal(cl_device_id deviceID, cl_context context,
-                      cl_command_queue queue, const char* pattern,
-                      const char* testName, size_t bufSize, size_t preSize,
-                      size_t typeMultiplePreSize, size_t postSize,
-                      size_t typeMultiplePostSize)
-    int err;
-    int typeIdx, vecSizeIdx;
-    char tmpBuffer[2048];
-    char srcBuffer[2048];
-    size_t preSizeBytes, postSizeBytes, typeSize, totSize;
-    clState* pClState = newClState(deviceID, context, queue);
-    bufferStruct* pBuffers = newBufferStruct(
-        bufSize, bufSize * sizeof(cl_uint) / sizeof(cl_char), pClState);
-    if (pBuffers == NULL)
-    {
-        destroyClState(pClState);
-        vlog_error("%s : Could not create buffer\n", testName);
-        return -1;
-    }
-    for (typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
-    {
-        // Skip doubles if it is not supported otherwise enable pragma
-        if (types[typeIdx] == kDouble)
-        {
-            if (!is_extension_available(deviceID, "cl_khr_fp64"))
-            {
-                continue;
-            }
-            else
-            {
-                doReplace(tmpBuffer, 2048, pattern, ".PRAGMA.",
-                          "#pragma OPENCL EXTENSION cl_khr_fp64: ", ".STATE.",
-                          "enable");
-            }
-        }
-        else
-        {
-            if (types[typeIdx] == kLong || types[typeIdx] == kULong)
-            {
-                if (gIsEmbedded) continue;
-            }
-            doReplace(tmpBuffer, 2048, pattern, ".PRAGMA.", " ", ".STATE.",
-                      " ");
-        }
-        typeSize = get_explicit_type_size(types[typeIdx]);
-        preSizeBytes = preSize + typeSize * typeMultiplePreSize;
-        postSizeBytes = postSize + typeSize * typeMultiplePostSize;
-        for (vecSizeIdx = 1; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)
-        {
-            totSize = preSizeBytes + postSizeBytes
-                + typeSize * get_align(g_arrVecSizes[vecSizeIdx]);
-            doReplace(srcBuffer, 2048, tmpBuffer, ".TYPE.",
-                      g_arrTypeNames[typeIdx], ".NUM.",
-                      g_arrVecSizeNames[vecSizeIdx]);
-            if (srcBuffer[0] == '\0')
-            {
-                vlog_error("%s: failed to fill source buf for type %s%s\n",
-                           testName, g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            // log_info("Buffer is \"\n%s\n\"\n", srcBuffer);
-            // fflush(stdout);
-            err = clStateMakeProgram(pClState, srcBuffer, testName);
-            if (err)
-            {
-                vlog_error("%s: Error compiling \"\n%s\n\"", testName,
-                           srcBuffer);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            err = pushArgs(pBuffers, pClState);
-            if (err != 0)
-            {
-                vlog_error("%s: failed to push args %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            // log_info("About to Run kernel\n"); fflush(stdout);
-            // now we run the kernel
-            err = runKernel(
-                pClState,
-                bufSize
-                    / (g_arrVecSizes[vecSizeIdx] * g_arrTypeSizes[typeIdx]));
-            if (err != 0)
-            {
-                vlog_error("%s: runKernel fail (%ld threads) %s%s\n", testName,
-                           pClState->m_numThreads, g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            // log_info("About to retrieve results\n"); fflush(stdout);
-            err = retrieveResults(pBuffers, pClState);
-            if (err != 0)
-            {
-                vlog_error("%s: failed to retrieve results %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            if (preSizeBytes + postSizeBytes == 0)
-            {
-                // log_info("About to Check Correctness\n"); fflush(stdout);
-                err = checkCorrectnessAlign(pBuffers, pClState,
-                                            get_align(g_arrVecSizes[vecSizeIdx])
-                                                * typeSize);
-            }
-            else
-            {
-                // we're checking for an aligned struct
-                err = checkPackedCorrectness(pBuffers, pClState, totSize,
-                                             preSizeBytes);
-            }
-            if (err != 0)
-            {
-                vlog_error("%s: incorrect results %s%s\n", testName,
-                           g_arrTypeNames[typeIdx],
-                           g_arrVecSizeNames[vecSizeIdx]);
-                vlog_error("%s: Source was \"\n%s\n\"", testName, srcBuffer);
-                destroyBufferStruct(pBuffers, pClState);
-                destroyClState(pClState);
-                return -1;
-            }
-            clStateDestroyProgramAndKernel(pClState);
-        }
-    }
-    destroyBufferStruct(pBuffers, pClState);
-    destroyClState(pClState);
-    // vlog_error("%s : implementation incomplete : FAIL\n", testName);
-    return 0; // -1; // fails on account of not being written.
-static const char* patterns[] = {
-    ".PRAGMA..STATE.\n"
-    "__kernel void test_vec_align_array(.SRC_SCOPE. .TYPE..NUM. *source, "
-    ".DST_SCOPE. uint *dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)(source+tid));\n"
-    "}\n",
-    ".PRAGMA..STATE.\n"
-    "typedef struct myUnpackedStruct { \n"
-    ".PRE."
-    "    .TYPE..NUM. vec;\n"
-    ".POST."
-    "} testStruct;\n"
-    "__kernel void test_vec_align_struct(__constant .TYPE..NUM. *source, "
-    ".DST_SCOPE. uint *dest)\n"
-    "{\n"
-    "    .SRC_SCOPE. testStruct test;\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec));\n"
-    "}\n",
-    ".PRAGMA..STATE.\n"
-    "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
-    ".PRE."
-    "    .TYPE..NUM. vec;\n"
-    ".POST."
-    "} testStruct;\n"
-    "__kernel void test_vec_align_packed_struct(__constant .TYPE..NUM. "
-    "*source, .DST_SCOPE. uint *dest)\n"
-    "{\n"
-    "    .SRC_SCOPE. testStruct test;\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec) - (.SRC_SCOPE. "
-    "uchar *)&test);\n"
-    "}\n",
-    ".PRAGMA..STATE.\n"
-    "typedef struct myStruct { \n"
-    ".PRE."
-    "    .TYPE..NUM. vec;\n"
-    ".POST."
-    "} testStruct;\n"
-    "__kernel void test_vec_align_struct_arr(.SRC_SCOPE. testStruct *source, "
-    ".DST_SCOPE. uint *dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec));\n"
-    "}\n",
-    ".PRAGMA..STATE.\n"
-    "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
-    ".PRE."
-    "    .TYPE..NUM. vec;\n"
-    ".POST."
-    "} testStruct;\n"
-    "__kernel void test_vec_align_packed_struct_arr(.SRC_SCOPE.  testStruct "
-    "*source, .DST_SCOPE. uint *dest)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec) - "
-    "(.SRC_SCOPE. uchar *)&(source[0]));\n"
-    "}\n",
-    // __attribute__ ((packed))
-const char* pre_substitution_arr[] = { "",
-                                       "char c;\n",
-                                       "short3 s;",
-                                       ".TYPE.3 tPre;\n",
-                                       ".TYPE. arrPre[5];\n",
-                                       ".TYPE. arrPre[12];\n",
-                                       NULL };
-// alignments of everything in pre_substitution_arr as raw alignments
-// 0 if such a thing is meaningless
-size_t pre_align_arr[] = { 0,
-                           sizeof(cl_char),
-                           4 * sizeof(cl_short),
-                           0, // taken care of in type_multiple_pre_align_arr
-                           0,
-                           0 };
-// alignments of everything in pre_substitution_arr as multiples of
-// sizeof(.TYPE.)
-// 0 if such a thing is meaningless
-size_t type_multiple_pre_align_arr[] = { 0, 0, 0, 4, 5, 12 };
-const char* post_substitution_arr[] = { "",
-                                        "char cPost;\n",
-                                        ".TYPE. arrPost[3];\n",
-                                        ".TYPE. arrPost[5];\n",
-                                        ".TYPE.3 arrPost;\n",
-                                        ".TYPE. arrPost[12];\n",
-                                        NULL };
-// alignments of everything in post_substitution_arr as raw alignments
-// 0 if such a thing is meaningless
-size_t post_align_arr[] = { 0, sizeof(cl_char),
-                            0, // taken care of in type_multiple_post_align_arr
-                            0, 0,
-                            0 };
-// alignments of everything in post_substitution_arr as multiples of
-// sizeof(.TYPE.)
-// 0 if such a thing is meaningless
-size_t type_multiple_post_align_arr[] = { 0, 0, 3, 5, 4, 12 };
-// there hsould be a packed version of this?
-int test_vec_align_array(cl_device_id deviceID, cl_context context,
-                         cl_command_queue queue, int num_elements)
-    char tmp[2048];
-    int result;
-    log_info("Testing global\n");
-    doReplace(tmp, (size_t)2048, patterns[0], ".SRC_SCOPE.", "__global",
-              ".DST_SCOPE.", "__global"); //
-    result = test_vec_internal(deviceID, context, queue, tmp,
-                               "test_vec_align_array", BUFFER_SIZE, 0, 0, 0, 0);
-    return result;
-int test_vec_align_struct(cl_device_id deviceID, cl_context context,
-                          cl_command_queue queue, int num_elements)
-    char tmp1[2048], tmp2[2048];
-    int result = 0;
-    int preIdx, postIdx;
-    log_info("testing __private\n");
-    doReplace(tmp2, (size_t)2048, patterns[1], ".SRC_SCOPE.", "__private",
-              ".DST_SCOPE.", "__global"); //
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-            result =
-                test_vec_internal(deviceID, context, queue, tmp1,
-                                  "test_vec_align_struct", 512, 0, 0, 0, 0);
-            if (result != 0)
-            {
-                return result;
-            }
-        }
-    }
-    log_info("testing __local\n");
-    doReplace(tmp2, (size_t)2048, patterns[1], ".SRC_SCOPE.", "__local",
-              ".DST_SCOPE.", "__global"); //
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-            result =
-                test_vec_internal(deviceID, context, queue, tmp1,
-                                  "test_vec_align_struct", 512, 0, 0, 0, 0);
-            if (result != 0)
-            {
-                return result;
-            }
-        }
-    }
-    return 0;
-int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context,
-                                 cl_command_queue queue, int num_elements)
-    char tmp1[2048], tmp2[2048];
-    int result = 0;
-    int preIdx, postIdx;
-    log_info("Testing __private\n");
-    doReplace(tmp2, (size_t)2048, patterns[2], ".SRC_SCOPE.", "__private",
-              ".DST_SCOPE.", "__global"); //
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-            result = test_vec_internal(
-                deviceID, context, queue, tmp1, "test_vec_align_packed_struct",
-                512, pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
-                post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
-            if (result != 0)
-            {
-                return result;
-            }
-        }
-    }
-    log_info("testing __local\n");
-    doReplace(tmp2, (size_t)2048, patterns[2], ".SRC_SCOPE.", "__local",
-              ".DST_SCOPE.", "__global"); //
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-            result = test_vec_internal(
-                deviceID, context, queue, tmp1, "test_vec_align_packed_struct",
-                512, pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
-                post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
-            if (result != 0)
-            {
-                return result;
-            }
-        }
-    }
-    return 0;
-int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context,
-                              cl_command_queue queue, int num_elements)
-    char tmp1[2048], tmp2[2048];
-    int result = 0;
-    int preIdx, postIdx;
-    log_info("testing __global\n");
-    doReplace(tmp2, (size_t)2048, patterns[3], ".SRC_SCOPE.", "__global",
-              ".DST_SCOPE.", "__global"); //
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-            result = test_vec_internal(deviceID, context, queue, tmp1,
-                                       "test_vec_align_struct_arr", BUFFER_SIZE,
-                                       0, 0, 0, 0);
-            if (result != 0)
-            {
-                return result;
-            }
-        }
-    }
-    return 0;
-int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context,
-                                     cl_command_queue queue, int num_elements)
-    char tmp1[2048], tmp2[2048];
-    int result = 0;
-    int preIdx, postIdx;
-    log_info("Testing __global\n");
-    doReplace(tmp2, (size_t)2048, patterns[4], ".SRC_SCOPE.", "__global",
-              ".DST_SCOPE.", "__global"); //
-    for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
-    {
-        for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
-        {
-            doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
-                      pre_substitution_arr[preIdx], ".POST.",
-                      post_substitution_arr[postIdx]);
-            result = test_vec_internal(
-                deviceID, context, queue, tmp1,
-                "test_vec_align_packed_struct_arr", BUFFER_SIZE,
-                pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
-                post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
-            if (result != 0) return result;
-        }
-    }
-    return 0;
diff --git a/test_conformance/vectors/type_replacer.cpp b/test_conformance/vectors/type_replacer.cpp
deleted file mode 100644
index 39c6194..0000000
--- a/test_conformance/vectors/type_replacer.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <string.h>
-#if !defined(_MSC_VER)
-#include <stdint.h>
-#endif // !_MSC_VER
-size_t doReplace(char* dest, size_t destLength, const char* source,
-                 const char* stringToReplace1, const char* replaceWith1,
-                 const char* stringToReplace2, const char* replaceWith2)
-    size_t copyCount = 0;
-    const char* sourcePtr = source;
-    char* destPtr = dest;
-    const char* ptr1;
-    const char* ptr2;
-    size_t nJump;
-    size_t len1, len2;
-    size_t lenReplace1, lenReplace2;
-    len1 = strlen(stringToReplace1);
-    len2 = strlen(stringToReplace2);
-    lenReplace1 = strlen(replaceWith1);
-    lenReplace2 = strlen(replaceWith2);
-    for (; copyCount < destLength && *sourcePtr;)
-    {
-        ptr1 = strstr(sourcePtr, stringToReplace1);
-        ptr2 = strstr(sourcePtr, stringToReplace2);
-        if (ptr1 != NULL && (ptr2 == NULL || ptr2 > ptr1))
-        {
-            nJump = ptr1 - sourcePtr;
-            if (((uintptr_t)ptr1 - (uintptr_t)sourcePtr)
-                > destLength - copyCount)
-            {
-                return -1;
-            }
-            copyCount += nJump;
-            strncpy(destPtr, sourcePtr, nJump);
-            destPtr += nJump;
-            sourcePtr += nJump + len1;
-            strcpy(destPtr, replaceWith1);
-            destPtr += lenReplace1;
-        }
-        else if (ptr2 != NULL && (ptr1 == NULL || ptr1 >= ptr2))
-        {
-            nJump = ptr2 - sourcePtr;
-            if (nJump > destLength - copyCount)
-            {
-                return -2;
-            }
-            copyCount += nJump;
-            strncpy(destPtr, sourcePtr, nJump);
-            destPtr += nJump;
-            sourcePtr += nJump + len2;
-            strcpy(destPtr, replaceWith2);
-            destPtr += lenReplace2;
-        }
-        else
-        {
-            nJump = strlen(sourcePtr);
-            if (nJump > destLength - copyCount)
-            {
-                return -3;
-            }
-            copyCount += nJump;
-            strcpy(destPtr, sourcePtr);
-            destPtr += nJump;
-            sourcePtr += nJump;
-        }
-    }
-    *destPtr = '\0';
-    return copyCount;
-size_t doSingleReplace(char* dest, size_t destLength, const char* source,
-                       const char* stringToReplace, const char* replaceWith)
-    size_t copyCount = 0;
-    const char* sourcePtr = source;
-    char* destPtr = dest;
-    const char* ptr;
-    size_t nJump;
-    size_t len;
-    size_t lenReplace;
-    len = strlen(stringToReplace);
-    lenReplace = strlen(replaceWith);
-    for (; copyCount < destLength && *sourcePtr;)
-    {
-        ptr = strstr(sourcePtr, stringToReplace);
-        if (ptr != NULL)
-        {
-            nJump = ptr - sourcePtr;
-            if (((uintptr_t)ptr - (uintptr_t)sourcePtr)
-                > destLength - copyCount)
-            {
-                return -1;
-            }
-            copyCount += nJump;
-            strncpy(destPtr, sourcePtr, nJump);
-            destPtr += nJump;
-            sourcePtr += nJump + len;
-            strcpy(destPtr, replaceWith);
-            destPtr += lenReplace;
-        }
-        else
-        {
-            nJump = strlen(sourcePtr);
-            if (nJump > destLength - copyCount)
-            {
-                return -3;
-            }
-            copyCount += nJump;
-            strcpy(destPtr, sourcePtr);
-            destPtr += nJump;
-            sourcePtr += nJump;
-        }
-    }
-    *destPtr = '\0';
-    return copyCount;
diff --git a/test_conformance/vectors/type_replacer.h b/test_conformance/vectors/type_replacer.h
deleted file mode 100644
index d7eb7ef..0000000
--- a/test_conformance/vectors/type_replacer.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2017 The Khronos Group Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <stdlib.h>
-size_t doReplace(char* dest, size_t destLength, const char* source,
-                 const char* stringToReplace1, const char* replaceWith1,
-                 const char* stringToReplace2, const char* replaceWith2);
-size_t doSingleReplace(char* dest, size_t destLength, const char* source,
-                       const char* stringToReplace, const char* replaceWith);
diff --git a/test_conformance/workgroups/main.cpp b/test_conformance/workgroups/main.cpp
index 41ffa74..1b47420 100644
--- a/test_conformance/workgroups/main.cpp
+++ b/test_conformance/workgroups/main.cpp
@@ -47,9 +47,7 @@
     auto expected_min_version = Version(2, 0);
     if (version < expected_min_version)
-        version_expected_info("Test", "OpenCL",
-                              expected_min_version.to_string().c_str(),
-                              version.to_string().c_str());
+        version_expected_info("Test", expected_min_version.to_string().c_str(), version.to_string().c_str());
         return TEST_SKIP;
diff --git a/test_conformance/workgroups/test_wg_all.cpp b/test_conformance/workgroups/test_wg_all.cpp
index ccf17b6..33ebe99 100644
--- a/test_conformance/workgroups/test_wg_all.cpp
+++ b/test_conformance/workgroups/test_wg_all.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -79,8 +79,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_all_kernel_code, "test_wg_all");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_all_kernel_code, "test_wg_all", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -92,17 +91,14 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * (num_elements+1));
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * (num_elements+1));
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_float) * (num_elements + 1), NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * (num_elements+1), NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_any.cpp b/test_conformance/workgroups/test_wg_any.cpp
index 4785ad5..cd1ebff 100644
--- a/test_conformance/workgroups/test_wg_any.cpp
+++ b/test_conformance/workgroups/test_wg_any.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -79,8 +79,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_any_kernel_code, "test_wg_any");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_any_kernel_code, "test_wg_any", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -92,17 +91,14 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * (num_elements+1));
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * (num_elements+1));
-    streams[0] =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       sizeof(cl_float) * (num_elements + 1), NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * (num_elements+1), NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp
index 3555947..df4263b 100644
--- a/test_conformance/workgroups/test_wg_broadcast.cpp
+++ b/test_conformance/workgroups/test_wg_broadcast.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -174,9 +174,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_broadcast_1D_kernel_code,
-                                      "test_wg_broadcast_1D");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_1D_kernel_code, "test_wg_broadcast_1D", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -188,16 +186,14 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -283,9 +279,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_broadcast_2D_kernel_code,
-                                      "test_wg_broadcast_2D");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_2D_kernel_code, "test_wg_broadcast_2D", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -317,16 +311,14 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -410,9 +402,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_broadcast_3D_kernel_code,
-                                      "test_wg_broadcast_3D");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_3D_kernel_code, "test_wg_broadcast_3D", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -445,16 +435,14 @@
     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_reduce.cpp b/test_conformance/workgroups/test_wg_reduce.cpp
index eb26f49..92a5165 100644
--- a/test_conformance/workgroups/test_wg_reduce.cpp
+++ b/test_conformance/workgroups/test_wg_reduce.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -176,9 +176,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_int,
-                                      "test_wg_reduce_add_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_int, "test_wg_reduce_add_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -190,16 +188,14 @@
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -281,9 +277,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_uint,
-                                      "test_wg_reduce_add_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_uint, "test_wg_reduce_add_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -295,16 +289,14 @@
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -385,9 +377,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_long,
-                                      "test_wg_reduce_add_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_long, "test_wg_reduce_add_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -399,16 +389,14 @@
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -490,9 +478,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_ulong,
-                                      "test_wg_reduce_add_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_ulong, "test_wg_reduce_add_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -504,16 +490,14 @@
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_reduce_max.cpp b/test_conformance/workgroups/test_wg_reduce_max.cpp
index 3bbd3f2..7f37b5a 100644
--- a/test_conformance/workgroups/test_wg_reduce_max.cpp
+++ b/test_conformance/workgroups/test_wg_reduce_max.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -177,9 +177,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_int,
-                                      "test_wg_reduce_max_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_int, "test_wg_reduce_max_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -199,16 +197,14 @@
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -291,9 +287,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_uint,
-                                      "test_wg_reduce_max_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_uint, "test_wg_reduce_max_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -313,16 +307,14 @@
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -404,9 +396,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_long,
-                                      "test_wg_reduce_max_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_long, "test_wg_reduce_max_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -426,16 +416,14 @@
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -518,9 +506,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_ulong,
-                                      "test_wg_reduce_max_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_ulong, "test_wg_reduce_max_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -540,16 +526,14 @@
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_reduce_min.cpp b/test_conformance/workgroups/test_wg_reduce_min.cpp
index 7b1b22e..9d929c8 100644
--- a/test_conformance/workgroups/test_wg_reduce_min.cpp
+++ b/test_conformance/workgroups/test_wg_reduce_min.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -177,9 +177,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_int,
-                                      "test_wg_reduce_min_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_int, "test_wg_reduce_min_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -199,16 +197,14 @@
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -291,9 +287,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_uint,
-                                      "test_wg_reduce_min_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_uint, "test_wg_reduce_min_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -313,16 +307,14 @@
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -404,9 +396,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_long,
-                                      "test_wg_reduce_min_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_long, "test_wg_reduce_min_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -426,16 +416,14 @@
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -518,9 +506,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_ulong,
-                                      "test_wg_reduce_min_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_ulong, "test_wg_reduce_min_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -540,16 +526,14 @@
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp
index e695a16..40c50c8 100644
--- a/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -184,9 +184,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_int,
-                                      "test_wg_scan_exclusive_add_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_int, "test_wg_scan_exclusive_add_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -198,16 +196,14 @@
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -289,9 +285,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_uint,
-                                      "test_wg_scan_exclusive_add_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_uint, "test_wg_scan_exclusive_add_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -303,16 +297,14 @@
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -393,9 +385,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_long,
-                                      "test_wg_scan_exclusive_add_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_long, "test_wg_scan_exclusive_add_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -407,16 +397,14 @@
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -498,9 +486,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_ulong,
-                                      "test_wg_scan_exclusive_add_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_ulong, "test_wg_scan_exclusive_add_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -512,16 +498,14 @@
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
index 12338b6..7f37acd 100644
--- a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -176,9 +176,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_int,
-                                      "test_wg_scan_exclusive_max_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_int, "test_wg_scan_exclusive_max_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -198,16 +196,14 @@
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -290,9 +286,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_uint,
-                                      "test_wg_scan_exclusive_max_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_uint, "test_wg_scan_exclusive_max_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -312,16 +306,14 @@
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -403,9 +395,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_long,
-                                      "test_wg_scan_exclusive_max_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_long, "test_wg_scan_exclusive_max_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -425,16 +415,14 @@
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -517,9 +505,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_ulong,
-                                      "test_wg_scan_exclusive_max_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_ulong, "test_wg_scan_exclusive_max_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -539,16 +525,14 @@
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
index f4e6bf9..6111053 100644
--- a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -177,9 +177,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_int,
-                                      "test_wg_scan_exclusive_min_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_int, "test_wg_scan_exclusive_min_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -199,16 +197,14 @@
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -291,9 +287,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_uint,
-                                      "test_wg_scan_exclusive_min_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_uint, "test_wg_scan_exclusive_min_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -313,16 +307,14 @@
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -404,9 +396,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_long,
-                                      "test_wg_scan_exclusive_min_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_long, "test_wg_scan_exclusive_min_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -426,16 +416,14 @@
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -518,9 +506,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_ulong,
-                                      "test_wg_scan_exclusive_min_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_ulong, "test_wg_scan_exclusive_min_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -540,16 +526,14 @@
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp
index 51c98a4..9546794 100644
--- a/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -173,9 +173,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_int,
-                                      "test_wg_scan_inclusive_add_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_int, "test_wg_scan_inclusive_add_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -187,16 +185,14 @@
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -278,9 +274,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_uint,
-                                      "test_wg_scan_inclusive_add_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_uint, "test_wg_scan_inclusive_add_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -292,16 +286,14 @@
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -382,9 +374,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_long,
-                                      "test_wg_scan_inclusive_add_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_long, "test_wg_scan_inclusive_add_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -396,16 +386,14 @@
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -487,9 +475,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_ulong,
-                                      "test_wg_scan_inclusive_add_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_ulong, "test_wg_scan_inclusive_add_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -501,16 +487,14 @@
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
index 44ebf80..23d518c 100644
--- a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -175,9 +175,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_int,
-                                      "test_wg_scan_inclusive_max_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_int, "test_wg_scan_inclusive_max_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -189,16 +187,14 @@
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -280,9 +276,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_uint,
-                                      "test_wg_scan_inclusive_max_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_uint, "test_wg_scan_inclusive_max_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -294,16 +288,14 @@
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -384,9 +376,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_long,
-                                      "test_wg_scan_inclusive_max_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_long, "test_wg_scan_inclusive_max_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -398,16 +388,14 @@
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -489,9 +477,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_ulong,
-                                      "test_wg_scan_inclusive_max_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_ulong, "test_wg_scan_inclusive_max_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -503,16 +489,14 @@
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
index f2f0578..f4c788f 100644
--- a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
@@ -1,6 +1,6 @@
 // Copyright (c) 2017 The Khronos Group Inc.
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -175,9 +175,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_int,
-                                      "test_wg_scan_inclusive_min_int");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_int, "test_wg_scan_inclusive_min_int", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -189,16 +187,14 @@
     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -280,9 +276,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_uint,
-                                      "test_wg_scan_inclusive_min_uint");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_uint, "test_wg_scan_inclusive_min_uint", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -294,16 +288,14 @@
     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -384,9 +376,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_long,
-                                      "test_wg_scan_inclusive_min_long");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_long, "test_wg_scan_inclusive_min_long", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -398,16 +388,14 @@
     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
@@ -489,9 +477,7 @@
     int          i;
     MTdata       d;
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_ulong,
-                                      "test_wg_scan_inclusive_min_ulong");
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_ulong, "test_wg_scan_inclusive_min_ulong", "-cl-std=CL2.0" );
     if (err)
         return -1;
@@ -503,16 +489,14 @@
     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[0])
         log_error("clCreateBuffer failed\n");
         return -1;
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
     if (!streams[1])
         log_error("clCreateBuffer failed\n");
diff --git a/test_extensions/CMakeLists.txt b/test_extensions/CMakeLists.txt
new file mode 100644
index 0000000..3c48e18
--- /dev/null
+++ b/test_extensions/CMakeLists.txt
@@ -0,0 +1,2 @@
+set(HARNESS_LIB harness)
+add_subdirectory( media_sharing )
diff --git a/test_extensions/media_sharing/CMakeLists.txt b/test_extensions/media_sharing/CMakeLists.txt
new file mode 100644
index 0000000..9fdde1c
--- /dev/null
+++ b/test_extensions/media_sharing/CMakeLists.txt
@@ -0,0 +1,25 @@
+list(APPEND CLConform_LIBRARIES d3d9.lib dxva2.lib )
+        main.cpp
+        wrappers.cpp
+        utils.cpp
+        test_create_context.cpp
+        test_functions_api.cpp
+        test_functions_kernel.cpp
+        test_get_device_ids.cpp
+        test_interop_sync.cpp
+        test_memory_access.cpp
+        test_other_data_types.cpp
+    )
diff --git a/test_extensions/media_sharing/main.cpp b/test_extensions/media_sharing/main.cpp
new file mode 100644
index 0000000..98b766a
--- /dev/null
+++ b/test_extensions/media_sharing/main.cpp
@@ -0,0 +1,204 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <stdio.h>
+#include <stdlib.h>
+#include "harness/testHarness.h"
+#include "utils.h"
+#include "procs.h"
+test_definition test_list[] = {
+ADD_TEST( context_create ),
+ADD_TEST( get_device_ids ),
+ADD_TEST( api ),
+ADD_TEST( kernel ),
+ADD_TEST( other_data_types ),
+ADD_TEST( memory_access ),
+ADD_TEST( interop_user_sync )
+const int test_num = ARRAY_SIZE(test_list);
+clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR = NULL;
+clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR = NULL;
+clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR = NULL;
+clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR = NULL;
+cl_platform_id gPlatformIDdetected;
+cl_device_id gDeviceIDdetected;
+cl_device_type gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
+bool MediaSurfaceSharingExtensionInit()
+  clGetDeviceIDsFromDX9MediaAdapterKHR = (clGetDeviceIDsFromDX9MediaAdapterKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clGetDeviceIDsFromDX9MediaAdapterKHR");
+  if (clGetDeviceIDsFromDX9MediaAdapterKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clGetDeviceIDsFromDX9MediaAdapterKHR) returned NULL.\n");
+    return false;
+  }
+  clCreateFromDX9MediaSurfaceKHR = (clCreateFromDX9MediaSurfaceKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clCreateFromDX9MediaSurfaceKHR");
+  if (clCreateFromDX9MediaSurfaceKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clCreateFromDX9MediaSurfaceKHR) returned NULL.\n");
+    return false;
+  }
+  clEnqueueAcquireDX9MediaSurfacesKHR = (clEnqueueAcquireDX9MediaSurfacesKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clEnqueueAcquireDX9MediaSurfacesKHR");
+  if (clEnqueueAcquireDX9MediaSurfacesKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clEnqueueAcquireDX9MediaSurfacesKHR) returned NULL.\n");
+    return false;
+  }
+  clEnqueueReleaseDX9MediaSurfacesKHR = (clEnqueueReleaseDX9MediaSurfacesKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clEnqueueReleaseDX9MediaSurfacesKHR");
+  if (clEnqueueReleaseDX9MediaSurfacesKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clEnqueueReleaseDX9MediaSurfacesKHR) returned NULL.\n");
+    return false;
+  }
+  return true;
+bool DetectPlatformAndDevice()
+  std::vector<cl_platform_id> platforms;
+  cl_uint platformsNum = 0;
+  cl_int error = clGetPlatformIDs(0, 0, &platformsNum);
+  if (error != CL_SUCCESS)
+  {
+    print_error(error, "clGetPlatformIDs failed\n");
+    return false;
+  }
+  platforms.resize(platformsNum);
+  error = clGetPlatformIDs(platformsNum, &platforms[0], 0);
+  if (error != CL_SUCCESS)
+  {
+    print_error(error, "clGetPlatformIDs failed\n");
+    return false;
+  }
+  bool found = false;
+  for (size_t i = 0; i < platformsNum; ++i)
+  {
+    std::vector<cl_device_id> devices;
+    cl_uint devicesNum = 0;
+    error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, 0, 0, &devicesNum);
+    if (error != CL_SUCCESS)
+    {
+      print_error(error, "clGetDeviceIDs failed\n");
+      return false;
+    }
+    devices.resize(devicesNum);
+    error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, devicesNum, &devices[0], 0);
+    if (error != CL_SUCCESS)
+    {
+      print_error(error, "clGetDeviceIDs failed\n");
+      return false;
+    }
+    for (size_t j = 0; j < devicesNum; ++j)
+    {
+      if (is_extension_available(devices[j], "cl_khr_dx9_media_sharing"))
+      {
+        gPlatformIDdetected = platforms[i];
+        gDeviceIDdetected = devices[j];
+        found = true;
+        break;
+      }
+    }
+  }
+  if (!found)
+  {
+    log_info("Test was not run, because the media surface sharing extension is not supported for any devices.\n");
+    return false;
+  }
+  return true;
+bool CmdlineParse(int argc, const char *argv[])
+  char *env_mode = getenv( "CL_DEVICE_TYPE" );
+  if( env_mode != NULL )
+  {
+    if(strcmp(env_mode, "gpu") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_GPU;
+    else if(strcmp(env_mode, "cpu") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_CPU;
+    else if(strcmp(env_mode, "accelerator") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR;
+    else if(strcmp(env_mode, "default") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_DEFAULT") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
+    else
+    {
+      log_error("Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode);
+      return false;
+    }
+  }
+  for (int i = 0; i < argc; ++i)
+  {
+    if(strcmp(argv[i], "gpu") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_GPU") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_GPU;
+      continue;
+    }
+    else if(strcmp( argv[i], "cpu") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_CPU") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_CPU;
+      continue;
+    }
+    else if(strcmp( argv[i], "accelerator") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR;
+      continue;
+    }
+    else if(strcmp(argv[i], "CL_DEVICE_TYPE_DEFAULT") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
+      continue;
+    }
+    else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0)
+    {
+      CDeviceWrapper::AccelerationType(CDeviceWrapper::ACCELERATION_SW);
+    }
+  }
+  return true;
+int main(int argc, const char *argv[])
+  if (!CmdlineParse(argc, argv))
+    return TEST_FAIL;
+  if (!DetectPlatformAndDevice())
+  {
+    log_info("Test was not run, because the media surface sharing extension is not supported\n");
+    return TEST_SKIP;
+  }
+  if (!MediaSurfaceSharingExtensionInit())
+    return TEST_FAIL;
+  return runTestHarness(argc, argv, test_num, test_list, false, true, 0);
diff --git a/test_extensions/media_sharing/procs.h b/test_extensions/media_sharing/procs.h
new file mode 100644
index 0000000..6b57799
--- /dev/null
+++ b/test_extensions/media_sharing/procs.h
@@ -0,0 +1,31 @@
+// Copyright (c) 2019 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+extern int test_context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+#endif    // #ifndef __MEDIA_SHARING_PROCS_H__ 
\ No newline at end of file
diff --git a/test_extensions/media_sharing/test_create_context.cpp b/test_extensions/media_sharing/test_create_context.cpp
new file mode 100644
index 0000000..5637bc5
--- /dev/null
+++ b/test_extensions/media_sharing/test_create_context.cpp
@@ -0,0 +1,318 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "utils.h"
+int context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+                   int num_elements, unsigned int width, unsigned int height,
+                   TContextFuncType functionCreate, cl_dx9_media_adapter_type_khr adapterType,
+                   TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+  CResult result;
+  //create device
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  //generate input data
+  std::vector<cl_uchar> bufferIn(width * height * 3 / 2, 0);
+  if(!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+    clContextWrapper ctx;
+    switch(functionCreate)
+    {
+      ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+      break;
+      ctx = clCreateContextFromType(&contextProperties[0], gDeviceTypeSelected, NULL, NULL, &error);
+      break;
+    default:
+      log_error("Unknown context creation function enum\n");
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+      break;
+    }
+    if (error != CL_SUCCESS)
+    {
+      std::string functionName;
+      FunctionContextCreateToString(functionCreate, functionName);
+      log_error("%s failed: %s\n", functionName.c_str(), IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfo;
+    surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+    surfaceInfo.shared_handle = objectSharedHandle;
+    void *surfaceInfo = 0;
+    std::vector<cl_mem> memObjList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planesList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjList.push_back(planesList[planeIdx]);
+    }
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    cl_event event;
+    error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()),
+      &, 0, NULL, &event);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    cl_uint eventType = 0;
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clGetEventInfo failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    {
+      log_error("Invalid event != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    clReleaseEvent(event);
+    size_t origin[3] = {0,0,0};
+    size_t offset = 0;
+    size_t frameSize = width * height * 3 / 2;
+    std::vector<cl_uchar> out( frameSize, 0 );
+    for (size_t i = 0; i < memObjList.size(); ++i)
+    {
+      size_t planeWidth = (i == 0) ? width: width / 2;
+      size_t planeHeight = (i == 0) ? height: height / 2;
+      size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+      error = clEnqueueReadImage(cmdQueue,, CL_TRUE, origin, regionPlane, 0, 0, &, 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+      offset += planeWidth * planeHeight;
+    }
+    if (!YUVCompare(surfaceFormat, out, bufferIn, width, height))
+    {
+      log_error("OCL object verification failed - clEnqueueReadImage\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()),
+      &, 0, NULL, &event);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    eventType = 0;
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clGetEventInfo failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    {
+      log_error("Invalid event != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    clReleaseEvent(event);
+    //object verification
+    std::vector<cl_uchar> bufferOut(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+  }
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
+    log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+  return result.Result();
+int test_context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+  const unsigned int WIDTH = 256;
+  const unsigned int HEIGHT = 256;
+  std::vector<cl_dx9_media_adapter_type_khr> adapterTypes;
+#if defined(_WIN32)
+  adapterTypes.push_back(CL_ADAPTER_D3D9_KHR);
+  adapterTypes.push_back(CL_ADAPTER_D3D9EX_KHR);
+  adapterTypes.push_back(CL_ADAPTER_DXVA_KHR);
+  std::vector<TContextFuncType> contextFuncs;
+  contextFuncs.push_back(CONTEXT_CREATE_DEFAULT);
+  contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE);
+  std::vector<TSurfaceFormat> formats;
+  formats.push_back(SURFACE_FORMAT_NV12);
+  formats.push_back(SURFACE_FORMAT_YV12);
+  std::vector<TSharedHandleType> sharedHandleTypes;
+  sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED);
+#if defined(_WIN32)
+  sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED);
+  CResult result;
+  for (size_t adapterTypeIdx = 0; adapterTypeIdx < adapterTypes.size(); ++adapterTypeIdx)
+  {
+    //iteration through all create context functions
+    for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size(); ++contextFuncIdx)
+    {
+      //iteration through surface formats
+      for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx)
+      {
+        //shared handle enabled or disabled
+        for (size_t sharedHandleIdx = 0; sharedHandleIdx < sharedHandleTypes.size(); ++sharedHandleIdx)
+        {
+          if (adapterTypes[adapterTypeIdx] == CL_ADAPTER_D3D9_KHR && sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)
+            continue;
+          if(context_create(deviceID, context, queue, num_elements, WIDTH, HEIGHT,
+            contextFuncs[contextFuncIdx], adapterTypes[adapterTypeIdx], formats[formatIdx],
+            sharedHandleTypes[sharedHandleIdx]) != 0)
+          {
+            std::string sharedHandle = (sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)? "shared handle": "no shared handle";
+            std::string formatStr;
+            std::string adapterTypeStr;
+            SurfaceFormatToString(formats[formatIdx], formatStr);
+            AdapterToString(adapterTypes[adapterTypeIdx], adapterTypeStr);
+            log_error("\nTest case - clCreateContext (%s, %s, %s) failed\n\n", adapterTypeStr.c_str(), formatStr.c_str(), sharedHandle.c_str());
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+        }
+      }
+    }
+  }
+  return result.Result();
diff --git a/test_extensions/media_sharing/test_functions_api.cpp b/test_extensions/media_sharing/test_functions_api.cpp
new file mode 100644
index 0000000..cdc6ce8
--- /dev/null
+++ b/test_extensions/media_sharing/test_functions_api.cpp
@@ -0,0 +1,617 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "utils.h"
+int api_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                  unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                  TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+  const unsigned int FRAME_NUM = 2;
+  const cl_uchar MAX_VALUE = 255 / 2;
+  CResult result;
+  //create device
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  //generate input and expected data
+  std::vector<std::vector<cl_uchar> > bufferRef1(FRAME_NUM);
+  std::vector<std::vector<cl_uchar> > bufferRef2(FRAME_NUM);
+  std::vector<std::vector<cl_uchar> > bufferRef3(FRAME_NUM);
+  size_t frameSize = width * height * 3 / 2;
+  cl_uchar step = MAX_VALUE / FRAME_NUM;
+  for (size_t i = 0; i < FRAME_NUM; ++i)
+  {
+    if (!YUVGenerate(surfaceFormat, bufferRef1[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1))) ||
+        !YUVGenerate(surfaceFormat, bufferRef2[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1)), 0.2) ||
+        !YUVGenerate(surfaceFormat, bufferRef3[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1)), 0.4))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+  }
+  //iterates through all devices
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+    //create surface
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfo;
+    surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+    surfaceInfo.shared_handle = objectSharedHandle;
+    void *surfaceInfo = 0;
+    std::vector<cl_mem> memObjList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planesList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjList.push_back(planesList[planeIdx]);
+    }
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
+    {
+      if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef1[frameIdx % FRAME_NUM], width, height))
+      {
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      { //read operation
+        std::vector<cl_uchar> out( frameSize, 0 );
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+            &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += planeWidth * planeHeight;
+        }
+        if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image is different then shared OCL object: clEnqueueReadImage\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //write operation
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
+            0, 0, &bufferRef2[frameIdx % FRAME_NUM][offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += planeWidth * planeHeight;
+        }
+      }
+      { //read operation
+        std::vector<cl_uchar> out( frameSize, 0 );
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+            &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += planeWidth * planeHeight;
+        }
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, Shared OCL image verification after clEnqueueWriteImage failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //copy operation (shared OCL to OCL)
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          cl_image_format formatPlane;
+          formatPlane.image_channel_data_type = CL_UNORM_INT8;
+          formatPlane.image_channel_order = (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? CL_RG: CL_R;
+          cl_image_desc imageDesc = {0};
+          imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
+          imageDesc.image_width = planeWidth;
+          imageDesc.image_height = planeHeight;
+          clMemWrapper planeOCL = clCreateImage(ctx, CL_MEM_READ_WRITE, &formatPlane, &imageDesc, 0, &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clCreateImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          error = clEnqueueCopyImage(cmdQueue, memObjList[i], planeOCL, origin, origin, regionPlane, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          error = clEnqueueReadImage(cmdQueue, planeOCL, CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += planeWidth * planeHeight;
+        }
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image verification after clEnqueueCopyImage (from shared OCL to OCL) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //copy operation (OCL to shared OCL)
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth) * sizeof(cl_uchar);
+          cl_image_format formatPlane;
+          formatPlane.image_channel_data_type = CL_UNORM_INT8;
+          formatPlane.image_channel_order = (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? CL_RG: CL_R;
+          cl_image_desc imageDesc = {0};
+          imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
+          imageDesc.image_width = planeWidth;
+          imageDesc.image_height = planeHeight;
+          imageDesc.image_row_pitch = pitchSize;
+          clMemWrapper planeOCL = clCreateImage(ctx, CL_MEM_COPY_HOST_PTR, &formatPlane, &imageDesc, &bufferRef1[frameIdx % FRAME_NUM][offset], &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clCreateImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          error = clEnqueueCopyImage(cmdQueue, planeOCL, memObjList[i], origin, origin, regionPlane, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += planeWidth * planeHeight;
+        }
+        if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image verification after clEnqueueCopyImage (from OCL to shared OCL) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //copy from image to buffer
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        size_t bufferSize = sizeof(cl_uchar) * frameSize;
+        clMemWrapper buffer = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          error = clEnqueueCopyImageToBuffer(cmdQueue, memObjList[i], buffer, origin, regionPlane, offset, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyImageToBuffer failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += planeWidth * planeHeight * sizeof(cl_uchar);
+        }
+        std::vector<cl_uchar> out( frameSize, 0 );
+        error = clEnqueueReadBuffer( cmdQueue, buffer, CL_TRUE, 0, bufferSize, &out[0], 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to read buffer");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL buffer verification after clEnqueueCopyImageToBuffer (from shared OCL image to OCL buffer) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //copy buffer to image
+        size_t bufferSize = sizeof(cl_uchar) * frameSize;
+        clMemWrapper buffer = clCreateBuffer( ctx, CL_MEM_COPY_HOST_PTR, bufferSize, &bufferRef2[frameIdx % FRAME_NUM][0], &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          error = clEnqueueCopyBufferToImage(cmdQueue, buffer, memObjList[i], offset, origin, regionPlane, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyBufferToImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += planeWidth * planeHeight * sizeof(cl_uchar);
+        }
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image verification after clEnqueueCopyBufferToImage (from OCL buffer to shared OCL image) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //map operation to read
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth);
+          size_t rowPitch = 0;
+          size_t slicePitch = 0;
+          void *mapPtr = clEnqueueMapImage(cmdQueue, memObjList[i], CL_TRUE, CL_MAP_READ, origin, regionPlane,
+            &rowPitch, &slicePitch, 0, 0, 0, &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          for (size_t y = 0; y < planeHeight; ++y)
+            memcpy(&out[offset + y * pitchSize], static_cast<cl_uchar *>(mapPtr) + y * rowPitch / sizeof(cl_uchar), pitchSize * sizeof(cl_uchar));
+          error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i], mapPtr, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += pitchSize * planeHeight;
+        }
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, Mapped shared OCL image is different then expected\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //map operation to write
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth);
+          size_t rowPitch = 0;
+          size_t slicePitch = 0;
+          void *mapPtr = clEnqueueMapImage(cmdQueue, memObjList[i], CL_TRUE, CL_MAP_WRITE, origin, regionPlane,
+            &rowPitch, &slicePitch, 0, 0, 0, &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          for (size_t y = 0; y < planeHeight; ++y)
+            memcpy(static_cast<cl_uchar *>(mapPtr) + y * rowPitch / sizeof(cl_uchar), &bufferRef3[frameIdx % FRAME_NUM][offset + y * pitchSize], pitchSize * sizeof(cl_uchar));
+          error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i], mapPtr, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += pitchSize * planeHeight;
+        }
+      }
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+      std::vector<cl_uchar> bufferOut(frameSize, 0);
+      if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
+      {
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      if (!YUVCompare(surfaceFormat, bufferOut, bufferRef3[frameIdx % FRAME_NUM], width, height))
+      {
+        log_error("Frame idx: %i, media surface is different than expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
+      log_error("%s init failed\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+  return result.Result();
+int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+  CResult result;
+#if defined(_WIN32)
+  //D3D9
+  if(api_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(api_functions(deviceID, context, queue, num_elements, 3, 512, 256, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  //D3D9EX
+  if(api_functions(deviceID, context, queue, num_elements, 5, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(api_functions(deviceID, context, queue, num_elements, 7, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(api_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(api_functions(deviceID, context, queue, num_elements, 15, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  //DXVA
+  if(api_functions(deviceID, context, queue, num_elements, 20, 128, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(api_functions(deviceID, context, queue, num_elements, 40, 64, 64, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(api_functions(deviceID, context, queue, num_elements, 5, 512, 512, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(api_functions(deviceID, context, queue, num_elements, 2, 1024, 1024, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  return result.Result();
diff --git a/test_extensions/media_sharing/test_functions_kernel.cpp b/test_extensions/media_sharing/test_functions_kernel.cpp
new file mode 100644
index 0000000..f5c3e2d
--- /dev/null
+++ b/test_extensions/media_sharing/test_functions_kernel.cpp
@@ -0,0 +1,446 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <stdio.h>
+#include <stdlib.h>
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "utils.h"
+int kernel_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                     unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                     TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+  const unsigned int FRAME_NUM = 2;
+  const cl_uchar MAX_VALUE = 255 / 2;
+  const std::string PROGRAM_STR =
+    "__kernel void TestFunction( read_only image2d_t planeIn, write_only image2d_t planeOut, "
+    NL "                            sampler_t sampler, __global int *planeRes)"
+    NL "{"
+    NL "  int w = get_global_id(0);"
+    NL "  int h = get_global_id(1);"
+    NL "  int width = get_image_width(planeIn);"
+    NL "  int height = get_image_height(planeOut);"
+    NL "  float4 color0 = read_imagef(planeIn, sampler, (int2)(w,h)) + 0.2f;"
+    NL "  float4 color1 = read_imagef(planeIn, sampler, (float2)(w,h)) + 0.2f;"
+    NL "  color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, 0.5);"
+    NL "  write_imagef(planeOut, (int2)(w,h), color0);"
+    NL "  if(w == 0 && h == 0)"
+    NL "  {"
+    NL "    planeRes[0] = width;"
+    NL "    planeRes[1] = height;"
+    NL "  }"
+    NL "}";
+  CResult result;
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  std::vector<std::vector<cl_uchar> > bufferIn(FRAME_NUM);
+  std::vector<std::vector<cl_uchar> > bufferExp(FRAME_NUM);
+  size_t frameSize = width * height * 3 / 2;
+  cl_uchar step = MAX_VALUE / FRAME_NUM;
+  for (size_t i = 0; i < FRAME_NUM; ++i)
+  {
+    if (!YUVGenerate(surfaceFormat, bufferIn[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1))) ||
+        !YUVGenerate(surfaceFormat, bufferExp[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1)), 0.2))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+  }
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+    void *objectSrcHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceSrc;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceSrc,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSrcHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+    void *objectDstHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceDst;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceDst,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectDstHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfoSrc;
+    surfaceInfoSrc.resource = *(static_cast<CD3D9SurfaceWrapper *>(surfaceSrc.get()));
+    surfaceInfoSrc.shared_handle = objectSrcHandle;
+    cl_dx9_surface_info_khr surfaceInfoDst;
+    surfaceInfoDst.resource = *(static_cast<CD3D9SurfaceWrapper *>(surfaceDst.get()));
+    surfaceInfoDst.shared_handle = objectDstHandle;
+    void *surfaceInfoSrc = 0;
+    void *surfaceInfoDst = 0;
+    std::vector<cl_mem> memObjSrcList;
+    std::vector<cl_mem> memObjDstList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planeSrcList(planesNum);
+    std::vector<clMemWrapper> planeDstList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planeSrcList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoSrc, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjSrcList.push_back(planeSrcList[planeIdx]);
+      planeDstList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoDst, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjDstList.push_back(planeDstList[planeIdx]);
+    }
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!ImageInfoVerify(adapterType, memObjSrcList, width, height, surfaceSrc, objectSrcHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
+    {
+      if (!YUVSurfaceSet(surfaceFormat, surfaceSrc, bufferIn[frameIdx % FRAME_NUM], width, height))
+      {
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjSrcList.size()), &memObjSrcList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjDstList.size()), &memObjDstList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      clSamplerWrapper sampler = clCreateSampler( ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+      if(error != CL_SUCCESS)
+      {
+        log_error("Unable to create sampler\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+      clProgramWrapper program;
+      clKernelWrapper kernel;
+      const char *progPtr = PROGRAM_STR.c_str();
+      if(create_single_kernel_helper(ctx, &program, &kernel, 1, (const char **)&progPtr, "TestFunction"))
+        result.ResultSub(CResult::TEST_FAIL);
+      size_t bufferSize = sizeof(cl_int) * 2;
+      clMemWrapper imageRes = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+      size_t offset = 0;
+      size_t origin[3] = {0,0,0};
+      std::vector<cl_uchar> out( frameSize, 0 );
+      for (size_t i = 0; i < memObjSrcList.size(); ++i)
+      {
+        size_t planeWidth = (i == 0) ? width: width / 2;
+        size_t planeHeight = (i == 0) ? height: height / 2;
+        size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+        size_t threads[ 2 ] = { planeWidth, planeHeight };
+        error = clSetKernelArg( kernel, 0, sizeof( memObjSrcList[i] ), &memObjSrcList[i] );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        error = clSetKernelArg( kernel, 1, sizeof( memObjDstList[i] ), &memObjDstList[i] );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        error = clSetKernelArg( kernel, 2, sizeof( sampler ), &sampler );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        error = clSetKernelArg( kernel, 3, sizeof( imageRes ), &imageRes );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        size_t localThreads[ 2 ];
+        error = get_max_common_2D_work_group_size( ctx, kernel, threads, localThreads );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to get work group size to use" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        error = clEnqueueNDRangeKernel( cmdQueue, kernel, 2, NULL, threads, localThreads, 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to execute test kernel" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        std::vector<cl_uint> imageResOut(2, 0);
+        error = clEnqueueReadBuffer( cmdQueue, imageRes, CL_TRUE, 0, bufferSize, &imageResOut[0], 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to read buffer");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        if(imageResOut[0] != planeWidth)
+        {
+          log_error("Invalid width value, test = %i, expected = %i\n", imageResOut[0], planeWidth);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        if(imageResOut[1] != planeHeight)
+        {
+          log_error("Invalid height value, test = %i, expected = %i\n", imageResOut[1], planeHeight);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        error = clEnqueueReadImage(cmdQueue, memObjDstList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        offset += planeWidth * planeHeight;
+      }
+      if (!YUVCompare(surfaceFormat, out, bufferExp[frameIdx % FRAME_NUM], width, height))
+      {
+        log_error("Frame idx: %i, OCL objects are different than expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjSrcList.size()), &memObjSrcList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjDstList.size()), &memObjDstList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+      std::vector<cl_uchar> bufferOut(frameSize, 0);
+      if (!YUVSurfaceGet(surfaceFormat, surfaceDst, bufferOut, width, height))
+      {
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      if (!YUVCompare(surfaceFormat, bufferOut, bufferExp[frameIdx % FRAME_NUM], width, height))
+      {
+        log_error("Frame idx: %i, media surface is different than expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+  {
+      log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+  return result.Result();
+int test_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+  CResult result;
+#if defined(_WIN32)
+  //D3D9
+  if(kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(kernel_functions(deviceID, context, queue, num_elements, 3, 256, 256, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  //D3D9EX
+  if(kernel_functions(deviceID, context, queue, num_elements, 5, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(kernel_functions(deviceID, context, queue, num_elements, 7, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(kernel_functions(deviceID, context, queue, num_elements, 15, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  //DXVA
+  if(kernel_functions(deviceID, context, queue, num_elements, 20, 128, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(kernel_functions(deviceID, context, queue, num_elements, 40, 64, 64, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(kernel_functions(deviceID, context, queue, num_elements, 5, 512, 512, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(kernel_functions(deviceID, context, queue, num_elements, 2, 1024, 1024, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  return result.Result();
diff --git a/test_extensions/media_sharing/test_get_device_ids.cpp b/test_extensions/media_sharing/test_get_device_ids.cpp
new file mode 100644
index 0000000..f8947ea
--- /dev/null
+++ b/test_extensions/media_sharing/test_get_device_ids.cpp
@@ -0,0 +1,196 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "utils.h"
+int get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                   cl_dx9_media_adapter_type_khr adapterType)
+  CResult result;
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  cl_uint devicesExpectedNum = 0;
+  cl_int error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, 0, 0, &devicesExpectedNum);
+  if (error != CL_SUCCESS || devicesExpectedNum < 1)
+  {
+    log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error));
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+  std::vector<cl_device_id> devicesExpected(devicesExpectedNum);
+  error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, devicesExpectedNum, &devicesExpected[0], 0);
+  if (error != CL_SUCCESS)
+  {
+    log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error));
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+  while (deviceWrapper->AdapterNext())
+  {
+    std::vector<cl_dx9_media_adapter_type_khr> mediaAdapterTypes;
+    mediaAdapterTypes.push_back(adapterType);
+    std::vector<void *> mediaDevices;
+    mediaDevices.push_back(deviceWrapper->Device());
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result)))
+    {
+      return result.Result();
+    }
+    cl_uint devicesAllNum = 0;
+    error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+      CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum);
+    if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND)
+    {
+      log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    std::vector<cl_device_id> devicesAll;
+    if (devicesAllNum > 0)
+    {
+      devicesAll.resize(devicesAllNum);
+       error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+        CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesAllNum, &devicesAll[0], 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+    }
+    cl_uint devicesPreferredNum = 0;
+    error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+      CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesPreferredNum);
+    if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND)
+    {
+      log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    std::vector<cl_device_id> devicesPreferred;
+    if (devicesPreferredNum > 0)
+    {
+      devicesPreferred.resize(devicesPreferredNum);
+      error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+        CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesPreferredNum, &devicesPreferred[0], 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+    }
+    if (devicesAllNum < devicesPreferredNum)
+    {
+      log_error("Invalid number of preferred devices. It should be a subset of all devices\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    for (cl_uint i = 0; i < devicesPreferredNum; ++i)
+    {
+      cl_uint j = 0;
+      for (; j < devicesAllNum; ++j)
+      {
+        if (devicesPreferred[i] == devicesAll[j])
+          break;
+      }
+      if (j == devicesAllNum)
+      {
+        log_error("Preferred device is not a subset of all devices\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+    for (cl_uint i = 0; i < devicesAllNum; ++i)
+    {
+      cl_uint j = 0;
+      for (; j < devicesExpectedNum; ++j)
+      {
+        if (devicesAll[i] == devicesExpected[j])
+          break;
+      }
+      if (j == devicesExpectedNum)
+      {
+        log_error("CL_ALL_DEVICES_FOR_MEDIA_ADAPTER_KHR should be a subset of all devices for selected platform\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+  {
+      log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+  return result.Result();
+int test_get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+  CResult result;
+#if defined(_WIN32)
+  if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_D3D9_KHR) != 0)
+  {
+    log_error("\nTest case (D3D9) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_D3D9EX_KHR) != 0)
+  {
+    log_error("\nTest case (D3D9EX) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_DXVA_KHR) != 0)
+  {
+    log_error("\nTest case (DXVA) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  return result.Result();
diff --git a/test_extensions/media_sharing/test_interop_sync.cpp b/test_extensions/media_sharing/test_interop_sync.cpp
new file mode 100644
index 0000000..6831a14
--- /dev/null
+++ b/test_extensions/media_sharing/test_interop_sync.cpp
@@ -0,0 +1,357 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "utils.h"
+int interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+                   int num_elements, unsigned int width, unsigned int height,
+                   TContextFuncType functionCreate, cl_dx9_media_adapter_type_khr adapterType,
+                   TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle, cl_bool userSync)
+  CResult result;
+  //create device
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  //generate input data
+  std::vector<cl_uchar> bufferIn(width * height * 3 / 2, 0);
+  if(!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+    if (surfaceFormat != SURFACE_FORMAT_NV12 &&
+      !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string syncStr = (userSync == CL_TRUE) ? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s, user sync: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str(), syncStr.c_str());
+      return result.Result();
+    }
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+    clContextWrapper ctx;
+    switch(functionCreate)
+    {
+      ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+      break;
+      ctx = clCreateContextFromType(&contextProperties[0], gDeviceTypeSelected, NULL, NULL, &error);
+      break;
+    default:
+      log_error("Unknown context creation function enum\n");
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+      break;
+    }
+    if (error != CL_SUCCESS)
+    {
+      std::string functionName;
+      FunctionContextCreateToString(functionCreate, functionName);
+      log_error("%s failed: %s\n", functionName.c_str(), IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfo;
+    surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+    surfaceInfo.shared_handle = objectSharedHandle;
+    void *surfaceInfo = 0;
+    std::vector<cl_mem> memObjList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planesList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjList.push_back(planesList[planeIdx]);
+    }
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    if (userSync == CL_TRUE)
+    {
+ #if defined(_WIN32)
+      IDirect3DQuery9* eventQuery = NULL;
+      switch (adapterType)
+      {
+      case CL_ADAPTER_D3D9_KHR:
+        {
+          LPDIRECT3DDEVICE9 device = (LPDIRECT3DDEVICE9)deviceWrapper->Device();
+          device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
+          eventQuery->Issue(D3DISSUE_END);
+          while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
+            ;
+        }
+        break;
+      case CL_ADAPTER_D3D9EX_KHR:
+        {
+          LPDIRECT3DDEVICE9EX device = (LPDIRECT3DDEVICE9EX)deviceWrapper->Device();
+          device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
+          eventQuery->Issue(D3DISSUE_END);
+          while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
+            ;
+        }
+        break;
+      case CL_ADAPTER_DXVA_KHR:
+        {
+          CDXVAWrapper *DXVADevice = dynamic_cast<CDXVAWrapper *>(&(*deviceWrapper));
+          LPDIRECT3DDEVICE9EX device = (LPDIRECT3DDEVICE9EX)(DXVADevice->D3D9()).Device();
+          device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
+          eventQuery->Issue(D3DISSUE_END);
+          while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
+            ;
+        }
+        break;
+      default:
+        log_error("Unknown adapter type\n");
+        return false;
+        break;
+      }
+      if(eventQuery)
+      {
+          eventQuery->Release();
+      }
+    }
+    error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &, 0, 0, 0);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    size_t origin[3] = {0,0,0};
+    size_t offset = 0;
+    size_t frameSize = width * height * 3 / 2;
+    std::vector<cl_uchar> out( frameSize, 0 );
+    for (size_t i = 0; i < memObjList.size(); ++i)
+    {
+      size_t planeWidth = (i == 0) ? width: width / 2;
+      size_t planeHeight = (i == 0) ? height: height / 2;
+      size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+      error = clEnqueueReadImage(cmdQueue,, CL_TRUE, origin, regionPlane, 0, 0, &, 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+      offset += planeWidth * planeHeight;
+    }
+    if (!YUVCompare(surfaceFormat, out, bufferIn, width, height))
+    {
+      log_error("OCL object verification failed - clEnqueueReadImage\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &, 0, 0, 0);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    if (userSync == CL_TRUE)
+    {
+      error = clFinish(cmdQueue);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clFinish failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+    //shared object verification
+    std::vector<cl_uchar> bufferOut(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+  }
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
+    log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+  return result.Result();
+int test_interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+  const unsigned int WIDTH = 256;
+  const unsigned int HEIGHT = 256;
+  std::vector<cl_dx9_media_adapter_type_khr> adapters;
+#if defined(_WIN32)
+  adapters.push_back(CL_ADAPTER_D3D9_KHR);
+  adapters.push_back(CL_ADAPTER_D3D9EX_KHR);
+  adapters.push_back(CL_ADAPTER_DXVA_KHR);
+  std::vector<TContextFuncType> contextFuncs;
+  contextFuncs.push_back(CONTEXT_CREATE_DEFAULT);
+  contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE);
+  std::vector<TSurfaceFormat> formats;
+  formats.push_back(SURFACE_FORMAT_NV12);
+  formats.push_back(SURFACE_FORMAT_YV12);
+  std::vector<TSharedHandleType> sharedHandleTypes;
+  sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED);
+  sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED);
+  std::vector<cl_bool> sync;
+  sync.push_back(CL_FALSE);
+  sync.push_back(CL_TRUE);
+  CResult result;
+  for (size_t adapterIdx = 0; adapterIdx < adapters.size(); ++adapterIdx)
+  {
+    //iteration through all create context functions
+    for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size(); ++contextFuncIdx)
+    {
+      //iteration through YUV formats
+      for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx)
+      {
+        //shared handle enabled or disabled
+        for (size_t sharedHandleIdx = 0; sharedHandleIdx < sharedHandleTypes.size(); ++sharedHandleIdx)
+        {
+          //user sync interop disabled or enabled
+          for (size_t syncIdx = 0; syncIdx < sync.size(); ++syncIdx)
+          {
+            if (adapters[adapterIdx] == CL_ADAPTER_D3D9_KHR && sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)
+              continue;
+            if(interop_user_sync(deviceID, context, queue, num_elements, WIDTH, HEIGHT,
+              contextFuncs[contextFuncIdx], adapters[adapterIdx], formats[formatIdx],
+              sharedHandleTypes[sharedHandleIdx], sync[syncIdx]) != 0)
+            {
+              std::string syncStr = (sync[syncIdx] == CL_TRUE) ? "user sync enabled": "user sync disabled";
+              std::string sharedHandle = (sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)? "shared handle": "no shared handle";
+              std::string adapterStr;
+              std::string formatStr;
+              SurfaceFormatToString(formats[formatIdx], formatStr);
+              AdapterToString(adapters[adapterIdx], adapterStr);
+              log_error("\nTest case - clCreateContext (%s, %s, %s, %s) failed\n\n", adapterStr.c_str(), formatStr.c_str(), sharedHandle.c_str(), syncStr.c_str());
+              result.ResultSub(CResult::TEST_FAIL);
+            }
+          }
+        }
+      }
+    }
+  }
+  return result.Result();
diff --git a/test_extensions/media_sharing/test_memory_access.cpp b/test_extensions/media_sharing/test_memory_access.cpp
new file mode 100644
index 0000000..5aabaf6
--- /dev/null
+++ b/test_extensions/media_sharing/test_memory_access.cpp
@@ -0,0 +1,468 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "utils.h"
+int memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                  unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                  TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+  CResult result;
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  //creates device
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  //generate input and expected data
+  size_t frameSize = width * height * 3 / 2;
+  std::vector<cl_uchar> bufferRef0(frameSize, 0);
+  std::vector<cl_uchar> bufferRef1(frameSize, 0);
+  std::vector<cl_uchar> bufferRef2(frameSize, 0);
+  if (!YUVGenerate(surfaceFormat, bufferRef0, width, height, 0, 90) ||
+    !YUVGenerate(surfaceFormat, bufferRef1, width, height, 91, 180) ||
+    !YUVGenerate(surfaceFormat, bufferRef2, width, height, 181, 255))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  //iterates through all devices
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+    //creates surface
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+    if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef0, width, height))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    { //memory access write
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+      surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+      surfaceInfo.shared_handle = objectSharedHandle;
+      void *surfaceInfo = 0;
+      std::vector<cl_mem> memObjList;
+      unsigned int planesNum = PlanesNum(surfaceFormat);
+      std::vector<clMemWrapper> planesList(planesNum);
+      for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+      {
+        planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_WRITE_ONLY, adapterType, &surfaceInfo, planeIdx, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateFromDX9MediaSurfaceKHR failed for WRITE_ONLY plane %i: %s\n", planeIdx, IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+          return result.Result();
+        }
+        memObjList.push_back(planesList[planeIdx]);
+      }
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      size_t offset = 0;
+      size_t origin[3] = {0,0,0};
+      for (size_t i = 0; i < memObjList.size(); ++i)
+      {
+        size_t planeWidth = (i == 0) ? width: width / 2;
+        size_t planeHeight = (i == 0) ? height: height / 2;
+        size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+        error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
+          0, 0, &bufferRef1[offset], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        offset += planeWidth * planeHeight;
+      }
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+    std::vector<cl_uchar> bufferOut0(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut0, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!YUVCompare(surfaceFormat, bufferOut0, bufferRef1, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    { //memory access read
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+      surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+      surfaceInfo.shared_handle = objectSharedHandle;
+      void *surfaceInfo = 0;
+      std::vector<cl_mem> memObjList;
+      unsigned int planesNum = PlanesNum(surfaceFormat);
+      std::vector<clMemWrapper> planesList(planesNum);
+      for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+      {
+        planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_ONLY, adapterType, &surfaceInfo, planeIdx, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateFromDX9MediaSurfaceKHR failed for READ_ONLY plane %i: %s\n", planeIdx, IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+          return result.Result();
+        }
+        memObjList.push_back(planesList[planeIdx]);
+      }
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      std::vector<cl_uchar> out( frameSize, 0 );
+      size_t offset = 0;
+      size_t origin[3] = {0,0,0};
+      for (size_t i = 0; i < memObjList.size(); ++i)
+      {
+        size_t planeWidth = (i == 0) ? width: width / 2;
+        size_t planeHeight = (i == 0) ? height: height / 2;
+        size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+        error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+          &out[offset], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        offset += planeWidth * planeHeight;
+      }
+      if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height))
+      {
+        log_error("OCL image (READ_ONLY) is different then expected\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+    std::vector<cl_uchar> bufferOut1(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut1, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!YUVCompare(surfaceFormat, bufferOut1, bufferRef1, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    { //memory access read write
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+      surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+      surfaceInfo.shared_handle = objectSharedHandle;
+      void *surfaceInfo = 0;
+      std::vector<cl_mem> memObjList;
+      unsigned int planesNum = PlanesNum(surfaceFormat);
+      std::vector<clMemWrapper> planesList(planesNum);
+      for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+      {
+        planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateFromDX9MediaSurfaceKHR failed for READ_WRITE plane %i: %s\n", planeIdx, IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+          return result.Result();
+        }
+        memObjList.push_back(planesList[planeIdx]);
+      }
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      { //read
+        std::vector<cl_uchar> out( frameSize, 0 );
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+            &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += planeWidth * planeHeight;
+        }
+        if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height))
+        {
+          log_error("OCL image (READ_WRITE) is different then expected\n");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //write
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
+            0, 0, &bufferRef2[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+          offset += planeWidth * planeHeight;
+        }
+      }
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+    std::vector<cl_uchar> bufferOut2(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut2, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!YUVCompare(surfaceFormat, bufferOut2, bufferRef2, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+  }
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
+    log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+  return result.Result();
+int test_memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+  CResult result;
+#if defined(_WIN32)
+  //D3D9
+  if(memory_access(deviceID, context, queue, num_elements, 256, 256, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(memory_access(deviceID, context, queue, num_elements, 512, 256, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  //D3D9EX
+  if(memory_access(deviceID, context, queue, num_elements, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(memory_access(deviceID, context, queue, num_elements, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(memory_access(deviceID, context, queue, num_elements, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(memory_access(deviceID, context, queue, num_elements, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  //DXVA
+  if(memory_access(deviceID, context, queue, num_elements, 128, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(memory_access(deviceID, context, queue, num_elements, 64, 64, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(memory_access(deviceID, context, queue, num_elements, 512, 512, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(memory_access(deviceID, context, queue, num_elements, 1024, 1024, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  return result.Result();
diff --git a/test_extensions/media_sharing/test_other_data_types.cpp b/test_extensions/media_sharing/test_other_data_types.cpp
new file mode 100644
index 0000000..3a66725
--- /dev/null
+++ b/test_extensions/media_sharing/test_other_data_types.cpp
@@ -0,0 +1,1022 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <stdio.h>
+#include <stdlib.h>
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "utils.h"
+template<typename T>
+int other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                    unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                    TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+  const unsigned int FRAME_NUM = 2;
+  const float MAX_VALUE = 0.6f;
+  const std::string PROGRAM_STR =
+    "__kernel void TestFunction( read_only image2d_t imageIn, write_only image2d_t imageOut, "
+    NL "                            sampler_t sampler, __global int *imageRes)"
+    NL "{"
+    NL "  int w = get_global_id(0);"
+    NL "  int h = get_global_id(1);"
+    NL "  int width = get_image_width(imageIn);"
+    NL "  int height = get_image_height(imageOut);"
+    NL "  float4 color0 = read_imagef(imageIn, sampler, (int2)(w,h)) - 0.2f;"
+    NL "  float4 color1 = read_imagef(imageIn, sampler, (float2)(w,h)) - 0.2f;"
+    NL "  color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, 0.5);"
+    NL "  write_imagef(imageOut, (int2)(w,h), color0);"
+    NL "  if(w == 0 && h == 0)"
+    NL "  {"
+    NL "    imageRes[0] = width;"
+    NL "    imageRes[1] = height;"
+    NL "  }"
+    NL "}";
+  CResult result;
+  cl_image_format format;
+  if(!SurfaceFormatToOCL(surfaceFormat, format))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    if (!SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+    if(!ImageFormatCheck(ctx, CL_MEM_OBJECT_IMAGE2D, format))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by OCL (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+    if (format.image_channel_data_type == CL_HALF_FLOAT)
+    {
+      if (DetectFloatToHalfRoundingMode(cmdQueue))
+      {
+        log_error("Unable to detect rounding mode\n");
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+    }
+    std::vector<std::vector<T> > bufferIn(FRAME_NUM);
+    std::vector<std::vector<T> > bufferExp(FRAME_NUM);
+    float step = MAX_VALUE / static_cast<float>(FRAME_NUM);
+    unsigned int planeNum = ChannelNum(surfaceFormat);
+    for (size_t i = 0; i < FRAME_NUM; ++i)
+    {
+      DataGenerate(surfaceFormat, format.image_channel_data_type, bufferIn[i], width, height, planeNum, step * i, step * (i + 1));
+      DataGenerate(surfaceFormat, format.image_channel_data_type, bufferExp[i], width, height, planeNum, step * i, step * (i + 1), 0.2f);
+    }
+    void *objectSrcHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceSrc;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceSrc,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSrcHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+    void *objectDstHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceDst;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceDst,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectDstHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceSrcInfo;
+    CD3D9SurfaceWrapper *dx9SurfaceSrc = (static_cast<CD3D9SurfaceWrapper *>(surfaceSrc.get()));
+    surfaceSrcInfo.resource = *dx9SurfaceSrc;
+    surfaceSrcInfo.shared_handle = objectSrcHandle;
+    cl_dx9_surface_info_khr surfaceDstInfo;
+    CD3D9SurfaceWrapper *dx9SurfaceDst = (static_cast<CD3D9SurfaceWrapper *>(surfaceDst.get()));
+    surfaceDstInfo.resource = *dx9SurfaceDst;
+    surfaceDstInfo.shared_handle = objectDstHandle;
+    void *surfaceSrcInfo = 0;
+    void *surfaceDstInfo = 0;
+    //create OCL shared object
+    clMemWrapper objectSrcShared = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceSrcInfo, 0, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    clMemWrapper objectDstShared = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceDstInfo, 0, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+    std::vector<cl_mem> memObjList;
+    memObjList.push_back(objectSrcShared);
+    memObjList.push_back(objectDstShared);
+    if (!GetMemObjInfo(objectSrcShared, adapterType, surfaceSrc, objectSrcHandle))
+    {
+      log_error("Invalid memory object info\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    if (!GetImageInfo(objectSrcShared, format, sizeof(T) * planeNum,
+      width * sizeof(T) * planeNum,  0, width, height, 0, 0))
+    {
+      log_error("clGetImageInfo failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
+    {
+      //surface set
+#if defined(_WIN32)
+      D3DLOCKED_RECT rect;
+      if (FAILED((*dx9SurfaceSrc)->LockRect(&rect, NULL, 0)))
+      {
+        log_error("Surface lock failed\n");
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+      size_t pitch = rect.Pitch / sizeof(T);
+      size_t lineSize = width * planeNum * sizeof(T);
+      T *ptr = static_cast<T *>(rect.pBits);
+      for (size_t y = 0; y < height; ++y)
+        memcpy(ptr + y * pitch, &bufferIn[frameIdx % FRAME_NUM][y * width * planeNum], lineSize);
+      (*dx9SurfaceSrc)->UnlockRect();
+      void *surfaceInfo = 0;
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireMediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      size_t origin[3] = {0,0,0};
+      size_t region[3] = {width, height, 1};
+      { //read operation
+        std::vector<T> out( planeNum * width * height, 0 );
+        error = clEnqueueReadImage(cmdQueue, objectSrcShared, CL_TRUE, origin, region, 0, 0, &out[0], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferIn[frameIdx % FRAME_NUM], width, height, planeNum))
+        {
+          log_error("Frame idx: %i, OCL object is different then expected\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //write operation
+        error = clEnqueueWriteImage(cmdQueue, objectSrcShared, CL_TRUE, origin, region,
+          0, 0, &bufferExp[frameIdx % FRAME_NUM][0], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //kernel operations
+        clSamplerWrapper sampler = clCreateSampler( ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+        if(error != CL_SUCCESS)
+        {
+          log_error("Unable to create sampler\n");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        size_t threads[ 2 ] = { width, height };
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        const char *progPtr = PROGRAM_STR.c_str();
+        if(create_single_kernel_helper(ctx, &program, &kernel, 1, (const char **)&progPtr, "TestFunction"))
+          result.ResultSub(CResult::TEST_FAIL);
+        error = clSetKernelArg( kernel, 0, sizeof( objectSrcShared ), &(objectSrcShared) );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        error = clSetKernelArg( kernel, 1, sizeof( objectDstShared ), &(objectDstShared) );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        error = clSetKernelArg( kernel, 2, sizeof( sampler ), &sampler );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        size_t bufferSize = sizeof(cl_int) * 2;
+        clMemWrapper imageRes = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        error = clSetKernelArg( kernel, 3, sizeof( imageRes ), &imageRes );
+        size_t localThreads[ 2 ];
+        error = get_max_common_2D_work_group_size( ctx, kernel, threads, localThreads );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to get work group size to use" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        error = clEnqueueNDRangeKernel( cmdQueue, kernel, 2, NULL, threads, localThreads, 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to execute test kernel" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        std::vector<cl_uint> imageResOut(2, 0);
+        error = clEnqueueReadBuffer( cmdQueue, imageRes, CL_TRUE, 0, bufferSize, &imageResOut[0], 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to read buffer");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        if(imageResOut[0] != width)
+        {
+          log_error("Invalid width value, test = %i, expected = %i\n", imageResOut[0], width);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        if(imageResOut[1] != height)
+        {
+          log_error("Invalid height value, test = %i, expected = %i\n", imageResOut[1], height);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      { //map operation
+        size_t mapOrigin[3] = {0,0,0};
+        size_t mapRegion[3] = {width, height, 1};
+        std::vector<T> out( width * height * planeNum, 0 );
+        size_t rowPitch = 0;
+        size_t slicePitch = 0;
+        void *mapPtr = clEnqueueMapImage(cmdQueue, objectDstShared, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, mapOrigin, mapRegion,
+          &rowPitch, &slicePitch, 0, 0, 0, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        for (size_t y = 0; y < height; ++y)
+          memcpy(&out[y * width * planeNum], static_cast<T *>(mapPtr) + y * rowPitch / sizeof(T),
+          width * planeNum * sizeof(T));
+        if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferIn[frameIdx % FRAME_NUM], width, height, planeNum))
+        {
+          log_error("Frame idx: %i, Mapped OCL object is different then expected\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+        for (size_t y = 0; y < height; ++y)
+          memcpy(static_cast<T *>(mapPtr) + y * rowPitch / sizeof(T), &bufferExp[frameIdx % FRAME_NUM][y * width * planeNum],
+          width * planeNum * sizeof(T));
+        error = clEnqueueUnmapMemObject(cmdQueue, objectDstShared, mapPtr, 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseMediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+      std::vector<T> out(width * height * planeNum, 0);
+      //surface get
+#if defined(_WIN32)
+      if (FAILED((*dx9SurfaceDst)->LockRect(&rect, NULL, 0)))
+      {
+        log_error("Surface lock failed\n");
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+      pitch = rect.Pitch / sizeof(T);
+      lineSize = width * planeNum * sizeof(T);
+      ptr = static_cast<T *>(rect.pBits);
+      for (size_t y = 0; y < height; ++y)
+        memcpy(&out[y * width * planeNum], ptr + y * pitch, lineSize);
+      (*dx9SurfaceDst)->UnlockRect();
+      if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferExp[frameIdx % FRAME_NUM], width, height, planeNum))
+      {
+        log_error("Frame idx: %i, media object is different then expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+  if (deviceWrapper->Status() != DEVICE_PASS)
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+  {
+      log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
+  }
+  return result.Result();
+int test_other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+  CResult result;
+#if defined(_WIN32)
+  //D3D9
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, L16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, A8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, A8L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, A32B32G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, A16B16G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, A16B16G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, A8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, X8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, A8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+  {
+    log_error("\nTest case (D3D9, X8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  //D3D9EX
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, L16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, L16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A8L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A8L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A32B32G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A32B32G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, X8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, X8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, A8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, X8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+  {
+    log_error("\nTest case (D3D9EX, X8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  //DXVA
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, L16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, L16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A8L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A8L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A32B32G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A32B32G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, X8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, X8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, A8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, X8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+  {
+    log_error("\nTest case (DXVA, X8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+  return result.Result();
diff --git a/test_extensions/media_sharing/utils.cpp b/test_extensions/media_sharing/utils.cpp
new file mode 100644
index 0000000..f1f0f54
--- /dev/null
+++ b/test_extensions/media_sharing/utils.cpp
@@ -0,0 +1,1672 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "utils.h"
+#include "harness/errorHelpers.h"
+#include "harness/rounding_mode.h"
+#include <math.h>
+static RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
+_result(TEST_PASS), _resultLast(TEST_NORESULT)
+CResult::TTestResult CResult::ResultLast() const
+  return _resultLast;
+int CResult::Result() const
+  switch (_result)
+  {
+  case TEST_PASS:
+    return 0;
+    break;
+  case TEST_FAIL:
+    return 1;
+    break;
+  case TEST_ERROR:
+    return 2;
+    break;
+  default:
+    return -1;
+    break;
+  }
+void CResult::ResultSub( TTestResult result )
+  _resultLast = result;
+  if (static_cast<int>(result) > static_cast<int>(_result))
+    _result = result;
+void FunctionContextCreateToString(TContextFuncType contextCreateFunction, std::string &contextFunction)
+  switch(contextCreateFunction)
+  {
+    contextFunction = "CreateContext";
+    break;
+    contextFunction = "CreateContextFromType";
+    break;
+  default:
+    contextFunction = "Unknown";
+    log_error("FunctionContextCreateToString(): Unknown create function enum!");
+    break;
+  }
+void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, std::string &adapter)
+  switch(adapterType)
+  {
+  case CL_ADAPTER_D3D9_KHR:
+    adapter = "D3D9";
+    break;
+    adapter = "D3D9EX";
+    break;
+    adapter = "DXVA";
+    break;
+  default:
+    adapter = "Unknown";
+    log_error("AdapterToString(): Unknown adapter type!");
+    break;
+  }
+cl_context_info AdapterTypeToContextInfo( cl_dx9_media_adapter_type_khr adapterType )
+  switch (adapterType)
+  {
+  case CL_ADAPTER_D3D9_KHR:
+    break;
+    break;
+    break;
+  default:
+    log_error("AdapterTypeToContextInfo(): Unknown adapter type!");
+    return 0;
+    break;
+  }
+void YUVGenerateNV12( std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                     cl_uchar valueMin, cl_uchar valueMax, double valueAdd )
+  yuv.clear();
+  yuv.resize(width * height * 3 / 2, 0);
+  double min = static_cast<double>(valueMin);
+  double max = static_cast<double>(valueMax);
+  double range = 255;
+  double add = static_cast<double>(valueAdd * range);
+  double stepX = (max - min) / static_cast<double>(width);
+  double stepY = (max - min) /static_cast<double>(height);
+  //generate Y plane
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int offset = i * width;
+    double valueYPlane0 = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueXPlane0 = static_cast<double>(stepX * j);
+ + j) = static_cast<cl_uchar>(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
+    }
+  }
+  //generate UV planes
+  for (unsigned int i = 0; i < height / 2; ++i)
+  {
+    unsigned int offset = width * height + i * width;
+    double valueYPlane1 = static_cast<double>(stepY * i);
+    double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
+    for (unsigned int j = 0; j < width / 2; ++j)
+    {
+      double valueXPlane1 = static_cast<double>(stepX * j);
+      double valueXPlane2 = static_cast<double>(stepX * (width / 2 + j));
+ + j * 2) = static_cast<cl_uchar>(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
+ + j * 2 + 1) = static_cast<cl_uchar>(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
+    }
+  }
+void YUVGenerateYV12( std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ )
+  yuv.clear();
+  yuv.resize(width * height * 3 / 2, 0);
+  double min = static_cast<double>(valueMin);
+  double max = static_cast<double>(valueMax);
+  double range = 255;
+  double add = static_cast<double>(valueAdd * range);
+  double stepX = (max - min) / static_cast<double>(width);
+  double stepY = (max - min) /static_cast<double>(height);
+  unsigned offset = 0;
+  //generate Y plane
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int plane0Offset = offset + i * width;
+    double valueYPlane0 = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueXPlane0 = static_cast<double>(stepX * j);
+ + j) = static_cast<cl_uchar>(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
+    }
+  }
+  //generate V plane
+  offset += width * height;
+  for (unsigned int i = 0; i < height / 2; ++i)
+  {
+    unsigned int plane1Offset = offset + i * width / 2;
+    double valueYPlane1 = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width / 2; ++j)
+    {
+      double valueXPlane1 = static_cast<double>(stepX * j);
+ + j) = static_cast<cl_uchar>(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
+    }
+  }
+  //generate U plane
+  offset += width * height / 4;
+  for (unsigned int i = 0; i < height / 2; ++i)
+  {
+    unsigned int plane2Offset = offset + i * width / 2;
+    double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
+    for (unsigned int j = 0; j < width / 2; ++j)
+    {
+      double valueXPlane2 = static_cast<double>(stepX * j);
+ + j) = static_cast<cl_uchar>(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
+    }
+  }
+bool YUVGenerate( TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ )
+  switch (surfaceFormat)
+  {
+    YUVGenerateNV12(yuv, width, height, valueMin, valueMax, valueAdd);
+    break;
+    YUVGenerateYV12(yuv, width, height, valueMin, valueMax, valueAdd);
+    break;
+  default:
+    log_error("YUVGenerate(): Invalid surface type\n");
+    return false;
+    break;
+  }
+  return true;
+bool YUVSurfaceSetNV12( std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height )
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceSetNV12(): Surface lock failed\n");
+    return false;
+  }
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t lineSize = width * sizeof(cl_uchar);
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+  for (size_t y = 0; y < height; ++y)
+    memcpy(ptr + y * pitch, & * width), lineSize);
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(ptr + height * pitch + y * pitch, & * height + y * width), lineSize);
+  (*d3dSurface)->UnlockRect();
+  return true;
+  return false;
+bool YUVSurfaceSetYV12( std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height )
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceSetYV12(): Surface lock failed!\n");
+    return false;
+  }
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t pitchHalf = pitch / 2;
+  size_t lineSize = width * sizeof(cl_uchar);
+  size_t lineHalfSize = lineSize / 2;
+  size_t surfaceOffset = 0;
+  size_t yuvOffset = 0;
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+  for (size_t y = 0; y < height; ++y)
+    memcpy(ptr + surfaceOffset + y * pitch, & + y * width), lineSize);
+  surfaceOffset += height * pitch;
+  yuvOffset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(ptr + surfaceOffset + y * pitchHalf, & + y * lineHalfSize), lineHalfSize);
+  surfaceOffset += pitchHalf * height / 2;
+  yuvOffset += width * height / 4;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(ptr + surfaceOffset + y * pitchHalf, & + y * lineHalfSize), lineHalfSize);
+  (*d3dSurface)->UnlockRect();
+  return true;
+  return false;
+bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height )
+  switch (surfaceFormat)
+  {
+    if(!YUVSurfaceSetNV12(surface, yuv, width, height))
+      return false;
+    break;
+    if(!YUVSurfaceSetYV12(surface, yuv, width, height))
+      return false;
+    break;
+  default:
+    log_error("YUVSurfaceSet(): Invalid surface type!\n");
+    return false;
+    break;
+  }
+  return true;
+bool YUVSurfaceGetNV12( std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height )
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceGetNV12(): Surface lock failed!\n");
+    return false;
+  }
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t lineSize = width * sizeof(cl_uchar);
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+  size_t yuvOffset = 0;
+  size_t surfaceOffset = 0;
+  for (size_t y = 0; y < height; ++y)
+    memcpy(& + y * width), ptr + y * pitch, lineSize);
+  yuvOffset += width * height;
+  surfaceOffset += pitch * height;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(& + y * width), ptr + surfaceOffset + y * pitch, lineSize);
+  (*d3dSurface)->UnlockRect();
+  return true;
+  return false;
+bool YUVSurfaceGetYV12( std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height )
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceGetYV12(): Surface lock failed!\n");
+    return false;
+  }
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t pitchHalf = pitch / 2;
+  size_t lineSize = width * sizeof(cl_uchar);
+  size_t lineHalfSize = lineSize / 2;
+  size_t surfaceOffset = 0;
+  size_t yuvOffset = 0;
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+  for (size_t y = 0; y < height; ++y)
+    memcpy(& + y * width), ptr + surfaceOffset + y * pitch, lineSize);
+  surfaceOffset += pitch * height;
+  yuvOffset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(& + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
+  surfaceOffset += pitchHalf * height / 2;
+  yuvOffset += width * height / 4;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(& + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
+  (*d3dSurface)->UnlockRect();
+  return true;
+  return false;
+bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                   unsigned int width, unsigned int height )
+  switch (surfaceFormat)
+  {
+    if(!YUVSurfaceGetNV12(surface, yuv, width, height))
+      return false;
+    break;
+    if(!YUVSurfaceGetYV12(surface, yuv, width, height))
+      return false;
+    break;
+  default:
+    log_error("YUVSurfaceGet(): Invalid surface type!\n");
+    return false;
+    break;
+  }
+  return true;
+bool YUVCompareNV12( const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height )
+  //plane 0 verification
+  size_t offset = 0;
+  for (size_t y = 0; y < height; ++y)
+  {
+    size_t plane0Offset = offset + width * y;
+    for (size_t x = 0; x < width; ++x)
+    {
+      if (yuvTest[plane0Offset + x] != yuvRef[plane0Offset + x])
+      {
+        log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x, y);
+        return false;
+      }
+    }
+  }
+  //plane 1 and 2 verification
+  offset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+  {
+    size_t plane12Offset = offset + width * y;
+    for (size_t x = 0; x < width / 2; ++x)
+    {
+      if ( + 2 * x) != + 2 * x))
+      {
+        log_error("Plane 1 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane12Offset + 2 * x], yuvTest[plane12Offset + 2 * x], x, y);
+        return false;
+      }
+      if ( + 2 * x + 1) != + 2 * x + 1))
+      {
+        log_error("Plane 2 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane12Offset + 2 * x + 1], yuvTest[plane12Offset + 2 * x + 1], x, y);
+        return false;
+      }
+    }
+  }
+  return true;
+bool YUVCompareYV12( const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height )
+  //plane 0 verification
+  size_t offset = 0;
+  for (size_t y = 0; y < height; ++y)
+  {
+    size_t plane0Offset = width * y;
+    for (size_t x = 0; x < width; ++x)
+    {
+      if ( + x) != + x))
+      {
+        log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x ,y);
+        return false;
+      }
+    }
+  }
+  //plane 1 verification
+  offset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+  {
+    size_t plane1Offset = offset + width * y / 2;
+    for (size_t x = 0; x < width / 2; ++x)
+    {
+      if ( + x) != + x))
+      {
+        log_error("Plane 1 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane1Offset + x], yuvTest[plane1Offset + x], x, y);
+        return false;
+      }
+    }
+  }
+  //plane 2 verification
+  offset += width * height / 4;
+  for (size_t y = 0; y < height / 2; ++y)
+  {
+    size_t plane2Offset = offset + width * y / 2;
+    for (size_t x = 0; x < width / 2; ++x)
+    {
+      if ( + x) != + x))
+      {
+        log_error("Plane 2 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane2Offset + x], yuvTest[plane2Offset + x], x, y);
+        return false;
+      }
+    }
+  }
+  return true;
+bool YUVCompare( TSurfaceFormat surfaceFormat, const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                unsigned int width, unsigned int height )
+  switch (surfaceFormat)
+  {
+    if (!YUVCompareNV12(yuvTest, yuvRef, width, height))
+    {
+      log_error("OCL object is different than expected!\n");
+      return false;
+    }
+    break;
+    if (!YUVCompareYV12(yuvTest, yuvRef, width, height))
+    {
+      log_error("OCL object is different than expected!\n");
+      return false;
+    }
+    break;
+  default:
+    log_error("YUVCompare(): Invalid surface type!\n");
+    return false;
+    break;
+  }
+  return true;
+void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<float> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
+  data.clear();
+  data.reserve(width * height * channelNum);
+  double valueMin = static_cast<double>(cmin);
+  double valueMax = static_cast<double>(cmax);
+  double stepX = (valueMax - valueMin) / static_cast<double>(width);
+  double stepY = (valueMax - valueMin) /static_cast<double>(height);
+  double valueAdd = static_cast<double>(add);
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    double valueY = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueX = static_cast<double>(stepX * j);
+      switch (channelNum)
+      {
+      case 1:
+        data.push_back(static_cast<float>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
+        break;
+      case 2:
+        data.push_back(static_cast<float>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueY + valueAdd));
+        break;
+      case 4:
+        data.push_back(static_cast<float>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueY + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueX / 2 + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueY / 2 + valueAdd));
+        break;
+      default:
+        log_error("DataGenerate(): invalid channel number!");
+        return;
+        break;
+      }
+    }
+  }
+void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_half> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
+  data.clear();
+  data.reserve(width * height * channelNum);
+  double valueMin = static_cast<double>(cmin);
+  double valueMax = static_cast<double>(cmax);
+  double stepX = (valueMax - valueMin) / static_cast<double>(width);
+  double stepY = (valueMax - valueMin) /static_cast<double>(height);
+  switch(type)
+  {
+  case CL_HALF_FLOAT:
+    {
+      double valueAdd = static_cast<double>(add);
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        double valueY = static_cast<double>(stepY * i);
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          double valueX = static_cast<double>(stepX * j);
+          switch (channelNum)
+          {
+          case 1:
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX / 2 + valueY / 2 + valueAdd)));
+            break;
+          case 2:
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY + valueAdd)));
+            break;
+          case 4:
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX / 2 + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY / 2 + valueAdd)));
+            break;
+          default:
+            log_error("DataGenerate(): invalid channel number!");
+            return;
+            break;
+          }
+        }
+      }
+      break;
+    }
+  case CL_UNORM_INT16:
+    {
+      double range = 65535;
+      double valueAdd = static_cast<double>(add * range);
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        double valueY = static_cast<double>(stepY * i * range);
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          double valueX = static_cast<double>(stepX * j * range);
+          switch (channelNum)
+          {
+          case 1:
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
+            break;
+          case 2:
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueY + valueAdd));
+            break;
+          case 4:
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueY + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX / 2 + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueY / 2 + valueAdd));
+            break;
+          default:
+            log_error("DataGenerate(): invalid channel number!");
+            return;
+            break;
+          }
+        }
+      }
+    }
+    break;
+  default:
+    log_error("DataGenerate(): unknown data type!");
+    return;
+    break;
+  }
+void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_uchar> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
+  data.clear();
+  data.reserve(width * height * channelNum);
+  double valueMin = static_cast<double>(cmin);
+  double valueMax = static_cast<double>(cmax);
+  double stepX = (valueMax - valueMin) / static_cast<double>(width);
+  double stepY = (valueMax - valueMin) /static_cast<double>(height);
+  double range = 255;
+  double valueAdd = static_cast<double>(add * range);
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    double valueY = static_cast<double>(stepY * i * range);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueX = static_cast<double>(stepX * j * range);
+      switch (channelNum)
+      {
+      case 1:
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
+        break;
+      case 2:
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<cl_uchar>(valueMin + valueY + valueAdd));
+        break;
+      case 4:
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<cl_uchar>(valueMin + valueY + valueAdd));
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2 + valueAdd));
+        if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8)
+          data.push_back(static_cast<cl_uchar>(0xff));
+        else
+          data.push_back(static_cast<cl_uchar>(valueMin + valueY / 2 + valueAdd));
+        break;
+      default:
+        log_error("DataGenerate(): invalid channel number!");
+        return;
+        break;
+      }
+    }
+  }
+bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<float> &dataTest, const std::vector<float> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum)
+  float epsilon = 0.000001f;
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int offset = i * width * channelNum;
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
+      {
+        if (abs( + j * channelNum + planeIdx) - + j * channelNum + planeIdx)) > epsilon)
+        {
+          log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n",
+            j, i, planeIdx, dataTest[offset + j * channelNum + planeIdx], dataExp[offset + j * channelNum + planeIdx]);
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_half> &dataTest, const std::vector<cl_half> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum)
+  switch(type)
+  {
+  case CL_HALF_FLOAT:
+    {
+      float epsilon = 0.001f;
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        unsigned int offset = i * width * channelNum;
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
+          {
+            float test = convert_half_to_float( + j * channelNum + planeIdx));
+            float ref = convert_half_to_float( + j * channelNum + planeIdx));
+            if (abs(test - ref) > epsilon)
+            {
+              log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n",
+                j, i, planeIdx, test, ref);
+              return false;
+            }
+          }
+        }
+      }
+    }
+    break;
+  case CL_UNORM_INT16:
+    {
+      cl_ushort epsilon = 1;
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        unsigned int offset = i * width * channelNum;
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
+          {
+            cl_ushort test = + j * channelNum + planeIdx);
+            cl_ushort ref = + j * channelNum + planeIdx);
+            if (abs(test - ref) > epsilon)
+            {
+              log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n", j, i, planeIdx, test, ref);
+              return false;
+            }
+          }
+        }
+      }
+    }
+    break;
+  default:
+    log_error("DataCompare(): Invalid data format!");
+    return false;
+    break;
+  }
+  return true;
+bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_uchar> &dataTest, const std::vector<cl_uchar> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int planeNum )
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int offset = i * width * planeNum;
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      for(unsigned planeIdx = 0; planeIdx < planeNum; ++planeIdx)
+      {
+        if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8 && planeIdx == 3)
+          continue;
+        cl_uchar test = + j * planeNum + planeIdx);
+        cl_uchar ref = + j * planeNum + planeIdx);
+        if (test != ref)
+        {
+          log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n",
+            j, i, planeIdx, test, ref);
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+bool GetImageInfo( cl_mem object, cl_image_format formatExp, size_t elementSizeExp, size_t rowPitchExp,
+                  size_t slicePitchExp, size_t widthExp, size_t heightExp, size_t depthExp , unsigned int planeExp)
+  bool result = true;
+  cl_image_format format;
+  if (clGetImageInfo(object, CL_IMAGE_FORMAT, sizeof(cl_image_format), &format, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_FORMAT) failed\n");
+    result = false;
+  }
+  if (formatExp.image_channel_order != format.image_channel_order || formatExp.image_channel_data_type != format.image_channel_data_type)
+  {
+    log_error("Value of CL_IMAGE_FORMAT is different than expected\n");
+    result = false;
+  }
+  size_t elementSize = 0;
+  if (clGetImageInfo(object, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elementSize, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_ELEMENT_SIZE) failed\n");
+    result = false;
+  }
+  if (elementSizeExp != elementSize)
+  {
+    log_error("Value of CL_IMAGE_ELEMENT_SIZE is different than expected (size: %i, exp size: %i)\n", elementSize, elementSizeExp);
+    result = false;
+  }
+  size_t rowPitch = 0;
+  if (clGetImageInfo(object, CL_IMAGE_ROW_PITCH, sizeof(size_t), &rowPitch, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_ROW_PITCH) failed\n");
+    result = false;
+  }
+  if ((rowPitchExp == 0 && rowPitchExp != rowPitch) || (rowPitchExp > 0 && rowPitchExp > rowPitch))
+  {
+    log_error("Value of CL_IMAGE_ROW_PITCH is different than expected (size: %i, exp size: %i)\n", rowPitch, rowPitchExp);
+    result = false;
+  }
+  size_t slicePitch = 0;
+  if (clGetImageInfo(object, CL_IMAGE_SLICE_PITCH, sizeof(size_t), &slicePitch, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_SLICE_PITCH) failed\n");
+    result = false;
+  }
+  if ((slicePitchExp == 0 && slicePitchExp != slicePitch) || (slicePitchExp > 0 && slicePitchExp > slicePitch))
+  {
+    log_error("Value of CL_IMAGE_SLICE_PITCH is different than expected (size: %i, exp size: %i)\n", slicePitch, slicePitchExp);
+    result = false;
+  }
+  size_t width = 0;
+  if (clGetImageInfo(object, CL_IMAGE_WIDTH, sizeof(size_t), &width, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_WIDTH) failed\n");
+    result = false;
+  }
+  if (widthExp != width)
+  {
+    log_error("Value of CL_IMAGE_WIDTH is different than expected (size: %i, exp size: %i)\n", width, widthExp);
+    result = false;
+  }
+  size_t height = 0;
+  if (clGetImageInfo(object, CL_IMAGE_HEIGHT, sizeof(size_t), &height, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_HEIGHT) failed\n");
+    result = false;
+  }
+  if (heightExp != height)
+  {
+    log_error("Value of CL_IMAGE_HEIGHT is different than expected (size: %i, exp size: %i)\n", height, heightExp);
+    result = false;
+  }
+  size_t depth = 0;
+  if (clGetImageInfo(object, CL_IMAGE_DEPTH, sizeof(size_t), &depth, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_DEPTH) failed\n");
+    result = false;
+  }
+  if (depthExp != depth)
+  {
+    log_error("Value of CL_IMAGE_DEPTH is different than expected (size: %i, exp size: %i)\n", depth, depthExp);
+    result = false;
+  }
+  unsigned int plane = 99;
+  size_t paramSize = 0;
+  if (clGetImageInfo(object, CL_IMAGE_DX9_MEDIA_PLANE_KHR, sizeof(unsigned int), &plane, &paramSize) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_MEDIA_SURFACE_PLANE_KHR) failed\n");
+    result = false;
+  }
+  if (planeExp != plane)
+  {
+    log_error("Value of CL_IMAGE_MEDIA_SURFACE_PLANE_KHR is different than expected (plane: %i, exp plane: %i)\n", plane, planeExp);
+    result = false;
+  }
+  return result;
+bool GetMemObjInfo( cl_mem object, cl_dx9_media_adapter_type_khr adapterType,  std::auto_ptr<CSurfaceWrapper> &surface, void *shareHandleExp )
+  bool result = true;
+  switch(adapterType)
+  {
+  case CL_ADAPTER_D3D9_KHR:
+    {
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+      void *surfaceInfo = 0;
+      return false;
+      size_t paramSize = 0;
+      if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR, sizeof(surfaceInfo), &surfaceInfo, &paramSize) != CL_SUCCESS)
+      {
+        log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR) failed\n");
+        result = false;
+      }
+#if defined(_WIN32)
+      CD3D9SurfaceWrapper *d3d9Surface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      if (*d3d9Surface != surfaceInfo.resource)
+      {
+        log_error("Invalid resource for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
+        result = false;
+      }
+      if (shareHandleExp != surfaceInfo.shared_handle)
+      {
+        log_error("Invalid shared handle for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
+        result = false;
+      }
+      return false;
+      if (paramSize != sizeof(surfaceInfo))
+      {
+        log_error("Invalid CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(surfaceInfo));
+        result = false;
+      }
+      paramSize = 0;
+      cl_dx9_media_adapter_type_khr mediaAdapterType;
+      if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR, sizeof(mediaAdapterType), &mediaAdapterType, &paramSize) != CL_SUCCESS)
+      {
+        log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR) failed\n");
+        result = false;
+      }
+      if (adapterType != mediaAdapterType)
+      {
+        log_error("Invalid media adapter type for CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR\n");
+        result = false;
+      }
+      if (paramSize != sizeof(mediaAdapterType))
+      {
+        log_error("Invalid CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(mediaAdapterType));
+        result = false;
+      }
+    }
+    break;
+  default:
+    log_error("GetMemObjInfo(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+  return result;
+bool ImageInfoVerify( cl_dx9_media_adapter_type_khr adapterType, const std::vector<cl_mem> &memObjList, unsigned int width, unsigned int height,
+                     std::auto_ptr<CSurfaceWrapper> &surface, void *sharedHandle)
+  if (memObjList.size() != 2 && memObjList.size() != 3)
+  {
+    log_error("ImageInfoVerify(): Invalid object list parameter\n");
+    return false;
+  }
+  cl_image_format formatPlane;
+  formatPlane.image_channel_data_type = CL_UNORM_INT8;
+  formatPlane.image_channel_order = CL_R;
+  //plane 0 verification
+  if (!GetImageInfo(memObjList[0], formatPlane, sizeof(cl_uchar),
+    width * sizeof(cl_uchar),
+    0,
+    width, height, 0, 0))
+  {
+    log_error("clGetImageInfo failed\n");
+    return false;
+  }
+  switch (memObjList.size())
+  {
+  case 2:
+    {
+      formatPlane.image_channel_data_type = CL_UNORM_INT8;
+      formatPlane.image_channel_order = CL_RG;
+      if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar) * 2,
+        width * sizeof(cl_uchar),
+        0,
+        width / 2, height / 2, 0, 1))
+      {
+        log_error("clGetImageInfo failed\n");
+        return false;
+      }
+    }
+    break;
+  case 3:
+    {
+      if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar),
+        width * sizeof(cl_uchar) / 2,
+        0,
+        width / 2, height / 2, 0, 1))
+      {
+        log_error("clGetImageInfo failed\n");
+        return false;
+      }
+      if (!GetImageInfo(memObjList[2], formatPlane, sizeof(cl_uchar),
+        width * sizeof(cl_uchar) / 2,
+        0,
+        width / 2, height / 2, 0, 2))
+      {
+        log_error("clGetImageInfo failed\n");
+        return false;
+      }
+    }
+    break;
+  default:
+    log_error("ImageInfoVerify(): Invalid object list parameter\n");
+    return false;
+    break;
+  }
+  for (size_t i = 0; i < memObjList.size(); ++i)
+  {
+    if (!GetMemObjInfo(memObjList[i], adapterType, surface, sharedHandle))
+    {
+      log_error("clGetMemObjInfo(%i) failed\n", i);
+      return false;
+    }
+  }
+  return true;
+bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, const cl_image_format imageFormatCheck)
+  cl_uint imageFormatsNum = 0;
+  cl_int error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, 0, 0, &imageFormatsNum);
+  if(error != CL_SUCCESS)
+  {
+    log_error("clGetSupportedImageFormats failed\n");
+    return false;
+  }
+  if(imageFormatsNum < 1)
+  {
+    log_error("Invalid image format number returned by clGetSupportedImageFormats\n");
+    return false;
+  }
+  std::vector<cl_image_format> imageFormats(imageFormatsNum);
+  error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, imageFormatsNum, &imageFormats[0], 0);
+  if(error != CL_SUCCESS)
+  {
+    log_error("clGetSupportedImageFormats failed\n");
+    return false;
+  }
+  for(cl_uint i = 0; i < imageFormatsNum; ++i)
+  {
+    if(imageFormats[i].image_channel_data_type == imageFormatCheck.image_channel_data_type
+      && imageFormats[i].image_channel_order == imageFormatCheck.image_channel_order)
+    {
+      return true;
+    }
+  }
+  return false;
+unsigned int ChannelNum( TSurfaceFormat surfaceFormat )
+  switch(surfaceFormat)
+  {
+    return 1;
+    break;
+  case SURFACE_FORMAT_G16R16:
+    return 2;
+    break;
+    return 3;
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+  case SURFACE_FORMAT_A16B16G16R16F:
+  case SURFACE_FORMAT_A16B16G16R16:
+    return 4;
+    break;
+  default:
+    log_error("ChannelNum(): unknown surface format!\n");
+    return 0;
+    break;
+  }
+unsigned int PlanesNum( TSurfaceFormat surfaceFormat )
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_G16R16:
+  case SURFACE_FORMAT_A32B32G32R32F:
+  case SURFACE_FORMAT_A16B16G16R16F:
+  case SURFACE_FORMAT_A16B16G16R16:
+    return 1;
+    break;
+    return 2;
+    break;
+    return 3;
+    break;
+  default:
+    log_error("PlanesNum(): unknown surface format!\n");
+    return 0;
+    break;
+  }
+#if defined(_WIN32)
+D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat)
+  switch(surfaceFormat)
+  {
+    return D3DFMT_R32F;
+    break;
+    return D3DFMT_R16F;
+    break;
+    return D3DFMT_L16;
+    break;
+    return D3DFMT_A8;
+    break;
+    return D3DFMT_L8;
+    break;
+    return D3DFMT_G32R32F;
+    break;
+    return D3DFMT_G16R16F;
+    break;
+  case SURFACE_FORMAT_G16R16:
+    return D3DFMT_G16R16;
+    break;
+    return D3DFMT_A8L8;
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+    return D3DFMT_A32B32G32R32F;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16F:
+    return D3DFMT_A16B16G16R16F;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16:
+    return D3DFMT_A16B16G16R16;
+    break;
+    return D3DFMT_A8B8G8R8;
+    break;
+    return D3DFMT_X8B8G8R8;
+    break;
+    return D3DFMT_A8R8G8B8;
+    break;
+    return D3DFMT_X8R8G8B8;
+    break;
+    return static_cast<D3DFORMAT>(MAKEFOURCC('N', 'V', '1', '2'));
+    break;
+    return static_cast<D3DFORMAT>(MAKEFOURCC('Y', 'V', '1', '2'));
+    break;
+  default:
+    log_error("SurfaceFormatToD3D(): unknown surface format!\n");
+    return D3DFMT_R32F;
+    break;
+  }
+bool DeviceCreate( cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CDeviceWrapper> &device )
+  switch (adapterType)
+  {
+#if defined(_WIN32)
+  case CL_ADAPTER_D3D9_KHR:
+    device = std::auto_ptr<CDeviceWrapper>(new CD3D9Wrapper());
+    break;
+    device = std::auto_ptr<CDeviceWrapper>(new CD3D9ExWrapper());
+    break;
+    device = std::auto_ptr<CDeviceWrapper>(new CDXVAWrapper());
+    break;
+  default:
+    log_error("DeviceCreate(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+  return device->Status();
+bool SurfaceFormatCheck( cl_dx9_media_adapter_type_khr adapterType, const CDeviceWrapper &device, TSurfaceFormat surfaceFormat )
+  switch (adapterType)
+  {
+#if defined(_WIN32)
+  case CL_ADAPTER_D3D9_KHR:
+    {
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      LPDIRECT3D9 d3d9 = static_cast<LPDIRECT3D9>(device.D3D());
+      D3DDISPLAYMODE d3ddm;
+      d3d9->GetAdapterDisplayMode(device.AdapterIdx(), &d3ddm);
+      if( FAILED(d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_SURFACE, d3dFormat)) )
+        return false;
+    }
+    break;
+  default:
+    log_error("SurfaceFormatCheck(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+  return true;
+bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format)
+  switch(surfaceFormat)
+  {
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_FLOAT;
+    break;
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_HALF_FLOAT;
+    break;
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT16;
+    break;
+    format.image_channel_order = CL_A;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_FLOAT;
+    break;
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_HALF_FLOAT;
+    break;
+  case SURFACE_FORMAT_G16R16:
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_UNORM_INT16;
+    break;
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_FLOAT;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16F:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_HALF_FLOAT;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_UNORM_INT16;
+    break;
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+    format.image_channel_order = CL_BGRA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+    format.image_channel_order = CL_BGRA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  default:
+    log_error("SurfaceFormatToOCL(): Unknown surface format!\n");
+    return false;
+    break;
+  }
+  return true;
+void SurfaceFormatToString( TSurfaceFormat surfaceFormat, std::string &str )
+  switch(surfaceFormat)
+  {
+    str = "R32F";
+    break;
+    str = "R16F";
+    break;
+    str = "L16";
+    break;
+    str = "A8";
+    break;
+    str = "L8";
+    break;
+    str = "G32R32F";
+    break;
+    str = "G16R16F";
+    break;
+  case SURFACE_FORMAT_G16R16:
+    str = "G16R16";
+    break;
+    str = "A8L8";
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+    str = "A32B32G32R32F";
+    break;
+  case SURFACE_FORMAT_A16B16G16R16F:
+    str = "A16B16G16R16F";
+    break;
+  case SURFACE_FORMAT_A16B16G16R16:
+    str = "A16B16G16R16";
+    break;
+    str = "A8B8G8R8";
+    break;
+    str = "X8B8G8R8";
+    break;
+    str = "A8R8G8B8";
+    break;
+    str = "X8R8G8B8";
+    break;
+    str = "NV12";
+    break;
+    str = "YV12";
+    break;
+  default:
+    log_error("SurfaceFormatToString(): unknown surface format!\n");
+    str = "unknown";
+    break;
+  }
+bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, unsigned int width, unsigned int height, TSurfaceFormat surfaceFormat,
+                        CDeviceWrapper &device, std::auto_ptr<CSurfaceWrapper> &surface, bool sharedHandle, void **objectSharedHandle)
+  switch (adapterType)
+  {
+#if defined(_WIN32)
+  case CL_ADAPTER_D3D9_KHR:
+    {
+      surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
+      CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      HRESULT hr = 0;
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      LPDIRECT3DDEVICE9 d3d9Device = (LPDIRECT3DDEVICE9)device.Device();
+      hr = d3d9Device->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
+        sharedHandle ? objectSharedHandle: 0);
+      if ( FAILED(hr))
+      {
+        log_error("CreateOffscreenPlainSurface failed\n");
+        return false;
+      }
+    }
+    break;
+    {
+      surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
+      CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      HRESULT hr = 0;
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      LPDIRECT3DDEVICE9EX d3d9ExDevice = (LPDIRECT3DDEVICE9EX)device.Device();
+      hr = d3d9ExDevice->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
+        sharedHandle ? objectSharedHandle: 0);
+      if ( FAILED(hr))
+      {
+        log_error("CreateOffscreenPlainSurface failed\n");
+        return false;
+      }
+    }
+    break;
+    {
+      surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
+      CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      HRESULT hr = 0;
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      IDXVAHD_Device *dxvaDevice = (IDXVAHD_Device *)device.Device();
+      hr = dxvaDevice->CreateVideoSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, 0,
+        DXVAHD_SURFACE_TYPE_VIDEO_INPUT,  1, &(*d3dSurface), sharedHandle ? objectSharedHandle: 0);
+      if ( FAILED(hr))
+      {
+        log_error("CreateVideoSurface failed\n");
+        return false;
+      }
+    }
+    break;
+  default:
+    log_error("MediaSurfaceCreate(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+  return true;
+cl_ushort float2half_rte( float f )
+  union{ float f; cl_uint u; } u = {f};
+  cl_uint sign = (u.u >> 16) & 0x8000;
+  float x = fabsf(f);
+  //Nan
+  if( x != x )
+  {
+    u.u >>= (24-11);
+    u.u &= 0x7fff;
+    u.u |= 0x0200;      //silence the NaN
+    return u.u | sign;
+  }
+  // overflow
+  if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+    return 0x7c00 | sign;
+  // underflow
+  if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+    return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+  // very small
+  if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+    return sign | 1;
+  // half denormal
+  if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+  {
+    u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+    return sign | u.u;
+  }
+  u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+  u.u &= 0x7f800000;
+  x += u.f;
+  u.f = x - u.f;
+  u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+  return (u.u >> (24-11)) | sign;
+cl_ushort float2half_rtz( float f )
+  union{ float f; cl_uint u; } u = {f};
+  cl_uint sign = (u.u >> 16) & 0x8000;
+  float x = fabsf(f);
+  //Nan
+  if( x != x )
+  {
+    u.u >>= (24-11);
+    u.u &= 0x7fff;
+    u.u |= 0x0200;      //silence the NaN
+    return u.u | sign;
+  }
+  // overflow
+  if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+  {
+    if( x == INFINITY )
+      return 0x7c00 | sign;
+    return 0x7bff | sign;
+  }
+  // underflow
+  if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+    return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+  // half denormal
+  if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+  {
+    x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+    return (cl_ushort)((int) x | sign);
+  }
+  u.u &= 0xFFFFE000U;
+  u.u -= 0x38000000U;
+  return (u.u >> (24-11)) | sign;
+cl_int deviceExistForCLTest(cl_platform_id platform,
+     cl_dx9_media_adapter_type_khr media_adapters_type,
+     void *media_adapters,
+     CResult &result,
+     TSharedHandleType sharedHandle /*default SHARED_HANDLE_ENABLED*/
+     )
+    cl_int _error;
+    cl_uint devicesAllNum = 0;
+    std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+    std::string adapterStr;
+    AdapterToString(media_adapters_type, adapterStr);
+    _error = clGetDeviceIDsFromDX9MediaAdapterKHR(platform, 1,
+        &media_adapters_type, &media_adapters, CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum);
+    if (_error != CL_SUCCESS)
+    {
+        if(_error != CL_DEVICE_NOT_FOUND)
+        {
+           log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(_error));
+           result.ResultSub(CResult::TEST_ERROR);
+        }
+        else
+        {
+          log_info("Skipping test case, device type is not supported by a device (adapter type: %s, shared handle: %s)\n", adapterStr.c_str(), sharedHandleStr.c_str());
+          result.ResultSub(CResult::TEST_NOTSUPPORTED);
+        }
+    }
+    return _error;
diff --git a/test_extensions/media_sharing/utils.h b/test_extensions/media_sharing/utils.h
new file mode 100644
index 0000000..38908ab
--- /dev/null
+++ b/test_extensions/media_sharing/utils.h
@@ -0,0 +1,174 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef __UTILS_KHR_MEDIA_H
+#define __UTILS_KHR_MEDIA_H
+#include <string>
+#include <iostream>
+#include <memory>
+#include <vector>
+#include "wrappers.h"
+#include "CL/cl_dx9_media_sharing.h"
+#include "harness/typeWrappers.h"
+extern clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR;
+extern clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR;
+extern clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR;
+extern clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR;
+extern cl_platform_id gPlatformIDdetected;
+extern cl_device_id gDeviceIDdetected;
+extern cl_device_type gDeviceTypeSelected;
+#define NL "\n"
+enum TSurfaceFormat
+enum TContextFuncType
+enum TSharedHandleType
+class CResult {
+  enum TTestResult {
+  };
+  CResult();
+  ~CResult();
+  void ResultSub(TTestResult result);
+  TTestResult ResultLast() const;
+  int Result() const;
+  TTestResult _result;
+  TTestResult _resultLast;
+void FunctionContextCreateToString(TContextFuncType contextCreateFunction, std::string &contextFunction);
+void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, std::string &adapter);
+cl_context_info AdapterTypeToContextInfo(cl_dx9_media_adapter_type_khr adapterType);
+//YUV utils
+void YUVGenerateNV12(std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                     cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0);
+void YUVGenerateYV12(std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                     cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0);
+bool YUVGenerate(TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                 cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0);
+bool YUVSurfaceSetNV12(std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceSetYV12(std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                   unsigned int width, unsigned int height);
+bool YUVSurfaceGetNV12(std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceGetYV12(std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                   unsigned int width, unsigned int height);
+bool YUVCompareNV12(const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height);
+bool YUVCompareYV12(const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height);
+bool YUVCompare(TSurfaceFormat surfaceFormat, const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                unsigned int width, unsigned int height);
+//other types utils
+void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<float> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
+void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_half> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
+void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_uchar> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
+bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_float> &dataTest, const std::vector<cl_float> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum);
+bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_half> &dataTest, const std::vector<cl_half> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum);
+bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_uchar> &dataTest, const std::vector<cl_uchar> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum);
+bool GetImageInfo(cl_mem object, cl_image_format formatExp, size_t elementSizeExp,
+                  size_t rowPitchExp, size_t slicePitchExp, size_t widthExp,
+                  size_t heightExp, size_t depthExp, unsigned int planeExp);
+bool GetMemObjInfo(cl_mem object, cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CSurfaceWrapper> &surface, void *shareHandleExp);
+bool ImageInfoVerify(cl_dx9_media_adapter_type_khr adapterType, const std::vector<cl_mem> &memObjList, unsigned int width, unsigned int height,
+                     std::auto_ptr<CSurfaceWrapper> &surface, void *sharedHandle);
+bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, const cl_image_format imageFormatCheck);
+unsigned int ChannelNum(TSurfaceFormat surfaceFormat);
+unsigned int PlanesNum(TSurfaceFormat surfaceFormat);
+#if defined(_WIN32)
+D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat);
+bool DeviceCreate(cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CDeviceWrapper> &device);
+bool SurfaceFormatCheck(cl_dx9_media_adapter_type_khr adapterType, const CDeviceWrapper &device, TSurfaceFormat surfaceFormat);
+bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format);
+void SurfaceFormatToString(TSurfaceFormat surfaceFormat, std::string &str );
+bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, unsigned int width, unsigned int height, TSurfaceFormat surfaceFormat,
+                      CDeviceWrapper &device, std::auto_ptr<CSurfaceWrapper> &surface, bool sharedHandle, void **objectSharedHandle);
+//imported from image helpers
+cl_ushort float2half_rte( float f );
+cl_ushort float2half_rtz( float f );
+cl_ushort convert_float_to_half( float f );
+float convert_half_to_float( unsigned short halfValue );
+int DetectFloatToHalfRoundingMode( cl_command_queue );
+cl_int deviceExistForCLTest(cl_platform_id platform,cl_dx9_media_adapter_type_khr media_adapters_type,void *media_adapters,CResult &result,TSharedHandleType sharedHandle=SHARED_HANDLE_DISABLED);
+#endif  // __UTILS_KHR_MEDIA_H
diff --git a/test_extensions/media_sharing/wrappers.cpp b/test_extensions/media_sharing/wrappers.cpp
new file mode 100644
index 0000000..e7eb5b2
--- /dev/null
+++ b/test_extensions/media_sharing/wrappers.cpp
@@ -0,0 +1,562 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "wrappers.h"
+#include "harness/errorHelpers.h"
+LPCTSTR CDeviceWrapper::WINDOW_TITLE = _T( "cl_khr_dx9_media_sharing" );
+const int CDeviceWrapper::WINDOW_WIDTH = 256;
+const int CDeviceWrapper::WINDOW_HEIGHT = 256;
+CDeviceWrapper::TAccelerationType CDeviceWrapper::accelerationType = CDeviceWrapper::ACCELERATION_HW;
+#if defined(_WIN32)
+const unsigned int CDXVAWrapper::VIDEO_FPS = 60;
+#if defined(_WIN32)
+static LRESULT WINAPI WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
+  switch(msg)
+  {
+  case WM_DESTROY:
+    PostQuitMessage(0);
+    return 0;
+  case WM_PAINT:
+    ValidateRect(hWnd, 0);
+    return 0;
+  default:
+    break;
+  }
+  return DefWindowProc(hWnd, msg, wParam, lParam);
+#if defined(_WIN32)
+void CDeviceWrapper::WindowInit()
+#if defined(_WIN32)
+  _hInstance = GetModuleHandle(NULL);
+  static WNDCLASSEX wc =
+  {
+    sizeof(WNDCLASSEX),
+    WndProc,
+    0L,
+    0L,
+    _hInstance,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+  };
+  RegisterClassEx(&wc);
+  _hWnd = CreateWindow(
+    0, 0,
+    NULL,
+    NULL,
+    wc.hInstance,
+    NULL);
+  if (!_hWnd)
+  {
+    log_error("Failed to create window");
+    return;
+  }
+  ShowWindow(_hWnd,SW_SHOWDEFAULT);
+  UpdateWindow(_hWnd);
+void CDeviceWrapper::WindowDestroy()
+#if defined(_WIN32)
+  if (_hWnd)
+    DestroyWindow(_hWnd);
+  _hWnd = NULL;
+#if defined(_WIN32)
+HWND CDeviceWrapper::WindowHandle() const
+  return _hWnd;
+int CDeviceWrapper::WindowWidth() const
+  return WINDOW_WIDTH;
+int CDeviceWrapper::WindowHeight() const
+  return WINDOW_HEIGHT;
+CDeviceWrapper::TAccelerationType CDeviceWrapper::AccelerationType()
+  return accelerationType;
+void CDeviceWrapper::AccelerationType( TAccelerationType accelerationTypeNew )
+  accelerationType = accelerationTypeNew;
+  WindowDestroy();
+#if defined(_WIN32)
+_d3d9(NULL), _d3dDevice(NULL), _status(DEVICE_PASS), _adapterIdx(0), _adapterFound(false)
+  WindowInit();
+  _d3d9 = Direct3DCreate9(D3D_SDK_VERSION);
+  if (!_d3d9)
+  {
+    log_error("Direct3DCreate9 failed\n");
+    _status = DEVICE_FAIL;
+  }
+  Destroy();
+  if(_d3d9)
+    _d3d9->Release();
+  _d3d9 = 0;
+void CD3D9Wrapper::Destroy()
+  if (_d3dDevice)
+    _d3dDevice->Release();
+  _d3dDevice = 0;
+cl_int CD3D9Wrapper::Init()
+  if (!WindowHandle())
+  {
+    log_error("D3D9: Window is not created\n");
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
+  }
+  if(!_d3d9 || DEVICE_PASS  != _status || !_adapterFound)
+    return false;
+  _d3d9->GetAdapterDisplayMode(_adapterIdx - 1, &_d3ddm);
+  ZeroMemory(&d3dParams, sizeof(d3dParams));
+  d3dParams.Windowed = TRUE;
+  d3dParams.BackBufferCount = 1;
+  d3dParams.SwapEffect = D3DSWAPEFFECT_DISCARD;
+  d3dParams.hDeviceWindow = WindowHandle();
+  d3dParams.BackBufferWidth = WindowWidth();
+  d3dParams.BackBufferHeight = WindowHeight();
+  d3dParams.BackBufferFormat = _d3ddm.Format;
+  if ( FAILED( _d3d9->CreateDevice( _adapterIdx - 1, D3DDEVTYPE_HAL, WindowHandle(),
+    processingType, &d3dParams, &_d3dDevice) ) )
+  {
+    log_error("CreateDevice failed\n");
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
+  }
+  _d3dDevice->BeginScene();
+  _d3dDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
+  _d3dDevice->EndScene();
+  return true;
+void * CD3D9Wrapper::D3D() const
+  return _d3d9;
+void *CD3D9Wrapper::Device() const
+  return _d3dDevice;
+D3DFORMAT CD3D9Wrapper::Format()
+  return _d3ddm.Format;
+  return _adapter;
+TDeviceStatus CD3D9Wrapper::Status() const
+  return _status;
+bool CD3D9Wrapper::AdapterNext()
+  if (DEVICE_PASS != _status)
+    return false;
+  _adapterFound = false;
+  for(; _adapterIdx < _d3d9->GetAdapterCount();)
+  {
+    ++_adapterIdx;
+    D3DCAPS9 caps;
+    if (FAILED(_d3d9->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps)))
+      continue;
+    if(FAILED(_d3d9->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
+    {
+      log_error("D3D9: GetAdapterIdentifier failed\n");
+      _status = DEVICE_FAIL;
+      return false;
+    }
+    _adapterFound = true;
+    Destroy();
+    if(!Init())
+    {
+      _status = DEVICE_FAIL;
+      _adapterFound = false;
+    }
+    break;
+  }
+  return _adapterFound;
+unsigned int CD3D9Wrapper::AdapterIdx() const
+  return _adapterIdx - 1;
+_d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(DEVICE_PASS), _adapterIdx(0), _adapterFound(false)
+  WindowInit();
+  HRESULT result = Direct3DCreate9Ex(D3D_SDK_VERSION, &_d3d9Ex);
+  if (FAILED(result) || !_d3d9Ex)
+  {
+    log_error("Direct3DCreate9Ex failed\n");
+    _status = DEVICE_FAIL;
+  }
+  Destroy();
+  if(_d3d9Ex)
+    _d3d9Ex->Release();
+  _d3d9Ex = 0;
+void * CD3D9ExWrapper::D3D() const
+  return _d3d9Ex;
+void *CD3D9ExWrapper::Device() const
+  return _d3dDeviceEx;
+D3DFORMAT CD3D9ExWrapper::Format()
+  return _d3ddmEx.Format;
+  return _adapter;
+cl_int CD3D9ExWrapper::Init()
+  if (!WindowHandle())
+  {
+    log_error("D3D9EX: Window is not created\n");
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
+  }
+  if(!_d3d9Ex || DEVICE_FAIL == _status || !_adapterFound)
+    return DEVICE_FAIL;
+  RECT rect;
+  GetClientRect(WindowHandle(),&rect);
+  ZeroMemory(&d3dParams, sizeof(d3dParams));
+  d3dParams.Windowed = TRUE;
+  d3dParams.SwapEffect = D3DSWAPEFFECT_FLIP;
+  d3dParams.BackBufferFormat = D3DFMT_X8R8G8B8;
+  d3dParams.BackBufferWidth = WindowWidth();
+  d3dParams.BackBufferHeight = WindowHeight();
+  d3dParams.BackBufferCount = 1;
+  d3dParams.hDeviceWindow = WindowHandle();
+  if ( FAILED( _d3d9Ex->CreateDeviceEx( _adapterIdx - 1, D3DDEVTYPE_HAL, WindowHandle(),
+    processingType, &d3dParams, NULL, &_d3dDeviceEx) ) )
+  {
+    log_error("CreateDeviceEx failed\n");
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
+  }
+  _d3dDeviceEx->BeginScene();
+  _d3dDeviceEx->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
+  _d3dDeviceEx->EndScene();
+  return DEVICE_PASS;
+void CD3D9ExWrapper::Destroy()
+  if (_d3dDeviceEx)
+    _d3dDeviceEx->Release();
+  _d3dDeviceEx = 0;
+TDeviceStatus CD3D9ExWrapper::Status() const
+  return _status;
+bool CD3D9ExWrapper::AdapterNext()
+  if (DEVICE_FAIL == _status)
+    return false;
+  _adapterFound = false;
+  for(; _adapterIdx < _d3d9Ex->GetAdapterCount();)
+  {
+    ++_adapterIdx;
+    D3DCAPS9 caps;
+    if (FAILED(_d3d9Ex->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps)))
+      continue;
+    if(FAILED(_d3d9Ex->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
+    {
+      log_error("D3D9EX: GetAdapterIdentifier failed\n");
+      _status = DEVICE_FAIL;
+      return false;
+    }
+    _adapterFound = true;
+    Destroy();
+    if(!Init())
+    {
+      _status = DEVICE_FAIL;
+      _adapterFound = _status;
+    }
+    break;
+  }
+  return _adapterFound;
+unsigned int CD3D9ExWrapper::AdapterIdx() const
+  return _adapterIdx - 1;
+_dxvaDevice(NULL), _status(DEVICE_PASS), _adapterFound(false)
+  _status = _d3d9.Status();
+  DXVAHDDestroy();
+void * CDXVAWrapper::Device() const
+  return _dxvaDevice;
+TDeviceStatus CDXVAWrapper::Status() const
+    if(_status == DEVICE_FAIL || _d3d9.Status() == DEVICE_FAIL)
+        return DEVICE_FAIL;
+    else if(_status == DEVICE_NOTSUPPORTED || _d3d9.Status() == DEVICE_NOTSUPPORTED)
+        return DEVICE_NOTSUPPORTED;
+    else
+        return DEVICE_PASS;
+bool CDXVAWrapper::AdapterNext()
+  if (DEVICE_PASS != _status)
+    return false;
+  _adapterFound = _d3d9.AdapterNext();
+  _status = _d3d9.Status();
+  if (DEVICE_PASS != _status)
+  {
+    _adapterFound = false;
+    return false;
+  }
+  if (!_adapterFound)
+    return false;
+  DXVAHDDestroy();
+  _status = DXVAHDInit();
+  if (DEVICE_PASS != _status)
+  {
+    _adapterFound = false;
+    return false;
+  }
+  return true;
+TDeviceStatus CDXVAWrapper::DXVAHDInit()
+  if ((_status == DEVICE_FAIL) || (_d3d9.Status() == DEVICE_FAIL) || !_adapterFound)
+    return DEVICE_FAIL;
+  desc.InputFrameRate = fps;
+  desc.InputWidth = WindowWidth();
+  desc.InputHeight = WindowHeight();
+  desc.OutputFrameRate = fps;
+  desc.OutputWidth = WindowWidth();
+  desc.OutputHeight = WindowHeight();
+  _status = DEVICE_FAIL;
+  return DEVICE_FAIL;
+  HRESULT hr = DXVAHD_CreateDevice(static_cast<IDirect3DDevice9Ex *>(_d3d9.Device()),
+  if(FAILED(hr))
+  {
+    if (hr == E_NOINTERFACE)
+    {
+      log_error("DXVAHD_CreateDevice skipped due to no supported devices!\n");
+      _status = DEVICE_NOTSUPPORTED;
+    }
+    else
+    {
+    log_error("DXVAHD_CreateDevice failed\n");
+    _status = DEVICE_FAIL;
+    }
+  }
+  return _status;
+void CDXVAWrapper::DXVAHDDestroy()
+  if (_dxvaDevice)
+    _dxvaDevice->Release();
+  _dxvaDevice = 0;
+void * CDXVAWrapper::D3D() const
+  return _d3d9.D3D();
+unsigned int CDXVAWrapper::AdapterIdx() const
+  return _d3d9.AdapterIdx();
+const CD3D9ExWrapper & CDXVAWrapper::D3D9() const
+  return _d3d9;
+CD3D9SurfaceWrapper::CD3D9SurfaceWrapper( IDirect3DSurface9* mem ):
+  if(mMem != NULL)
+    mMem->Release();
+  mMem = NULL;
diff --git a/test_extensions/media_sharing/wrappers.h b/test_extensions/media_sharing/wrappers.h
new file mode 100644
index 0000000..45b7032
--- /dev/null
+++ b/test_extensions/media_sharing/wrappers.h
@@ -0,0 +1,197 @@
+// Copyright (c) 2017 The Khronos Group Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef __WRAPPERS_H
+#define __WRAPPERS_H
+#if defined(_WIN32)
+#include <d3d9.h>
+#if defined (__MINGW32__)
+#include <rpcsal.h>
+typedef unsigned char UINT8;
+#define __out
+#define __in
+#define __inout
+#define __out_bcount(size)
+#define __out_bcount_opt(size)
+#define __in_opt
+#define __in_ecount(size)
+#define __in_ecount_opt(size)
+#define __out_opt
+#define __out_ecount(size)
+#define __out_ecount_opt(size)
+#define __in_bcount_opt(size)
+#define __inout_opt
+#define __inout_bcount(size)
+#define __in_bcount(size)
+#define __deref_out
+#include <dxvahd.h>
+#include <tchar.h>
+enum TDeviceStatus
+class CDeviceWrapper {
+  enum TAccelerationType
+  {
+  };
+  CDeviceWrapper();
+  virtual ~CDeviceWrapper();
+  virtual bool AdapterNext() = 0;
+  virtual unsigned int AdapterIdx() const = 0;
+  virtual void *Device() const = 0;
+  virtual TDeviceStatus Status() const = 0;
+  virtual void *D3D() const = 0;
+#if defined(_WIN32)
+  HWND WindowHandle() const;
+  int WindowWidth() const;
+  int WindowHeight() const;
+  void WindowInit();
+  static TAccelerationType AccelerationType();
+  static void AccelerationType(TAccelerationType accelerationTypeNew);
+  static const int WINDOW_WIDTH;
+  static const int WINDOW_HEIGHT;
+  static TAccelerationType accelerationType;
+#if defined(_WIN32)
+  HMODULE _hInstance;
+  HWND _hWnd;
+  void WindowDestroy();
+class CSurfaceWrapper
+  CSurfaceWrapper();
+  virtual ~CSurfaceWrapper();
+#if defined(_WIN32)
+//windows specific wrappers
+class CD3D9Wrapper: public CDeviceWrapper {
+  CD3D9Wrapper();
+  ~CD3D9Wrapper();
+  virtual bool AdapterNext();
+  virtual unsigned int AdapterIdx() const;
+  virtual void *Device() const;
+  virtual TDeviceStatus Status() const;
+  virtual void *D3D() const;
+  LPDIRECT3D9 _d3d9;
+  LPDIRECT3DDEVICE9 _d3dDevice;
+  TDeviceStatus _status;
+  unsigned int _adapterIdx;
+  bool _adapterFound;
+  D3DFORMAT Format();
+  int Init();
+  void Destroy();
+class CD3D9ExWrapper: public CDeviceWrapper {
+  CD3D9ExWrapper();
+  ~CD3D9ExWrapper();
+  virtual bool AdapterNext();
+  virtual unsigned int AdapterIdx() const;
+  virtual void *Device() const;
+  virtual TDeviceStatus Status() const;
+  virtual void *D3D() const;
+  LPDIRECT3D9EX _d3d9Ex;
+  TDeviceStatus _status;
+  unsigned int _adapterIdx;
+  bool _adapterFound;
+  D3DFORMAT Format();
+  int Init();
+  void Destroy();
+class CDXVAWrapper: public CDeviceWrapper {
+  CDXVAWrapper();
+  ~CDXVAWrapper();
+  virtual bool AdapterNext();
+  virtual unsigned int AdapterIdx() const;
+  virtual void *Device() const;
+  virtual TDeviceStatus Status() const;
+  virtual void *D3D() const;
+  const CD3D9ExWrapper &D3D9() const;
+  CD3D9ExWrapper _d3d9;
+  IDXVAHD_Device *_dxvaDevice;
+  TDeviceStatus _status;
+  bool _adapterFound;
+  static const D3DFORMAT VIDEO_FORMAT;
+  static const unsigned int VIDEO_FPS;
+  TDeviceStatus DXVAHDInit();
+  void DXVAHDDestroy();
+class CD3D9SurfaceWrapper: public CSurfaceWrapper
+  CD3D9SurfaceWrapper();
+  CD3D9SurfaceWrapper( IDirect3DSurface9* mem );
+  ~CD3D9SurfaceWrapper();
+  operator IDirect3DSurface9*() { return mMem; }
+  IDirect3DSurface9* * operator&() { return &mMem; }
+  IDirect3DSurface9* operator->() const { return mMem; }
+  IDirect3DSurface9* mMem;
+#endif  // __D3D_WRAPPERS
diff --git a/ b/
new file mode 100755
index 0000000..c7ad707
--- /dev/null
+++ b/
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+set -e
+export TOP=$(pwd)
+if [ "${JOB_CHECK_FORMAT}" -eq 1 ]; then
+    ./
+    exit $?
+# Prepare toolchain if needed
+if [[ ${JOB_ARCHITECTURE} != "" ]]; then
+    wget ${TOOLCHAIN_URL}
+    tar xf ${TOOLCHAIN_ARCHIVE}
+    export PATH=${TOOLCHAIN_DIR}/bin:${PATH}
+# Prepare headers
+git clone
+cd OpenCL-Headers
+ln -s CL OpenCL # For OSX builds
+cd ..
+# Get and build loader
+git clone
+cd ${TOP}/OpenCL-ICD-Loader
+mkdir build
+cd build
+# Get libclcxx
+cd ${TOP}
+git clone
+# Build CTS
+ls -l
+mkdir build
+cd build
+cmake -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
+      -DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \
+      -DCL_LIBCLCXX_DIR=${TOP}/libclcxx \
+      -DOPENCL_LIBRARIES="-lOpenCL -lpthread" \
+      ..
+make -j2